github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/java/pomxmlnet/pomxmlnet.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package pomxmlnet extracts Maven's pom.xml format with transitive dependency resolution. 16 package pomxmlnet 17 18 import ( 19 "context" 20 "fmt" 21 "maps" 22 "path/filepath" 23 "slices" 24 "strings" 25 26 "deps.dev/util/maven" 27 "deps.dev/util/resolve" 28 mavenresolve "deps.dev/util/resolve/maven" 29 "github.com/google/osv-scalibr/clients/datasource" 30 "github.com/google/osv-scalibr/clients/resolution" 31 "github.com/google/osv-scalibr/extractor" 32 "github.com/google/osv-scalibr/extractor/filesystem" 33 "github.com/google/osv-scalibr/extractor/filesystem/language/java/javalockfile" 34 "github.com/google/osv-scalibr/internal/mavenutil" 35 "github.com/google/osv-scalibr/inventory" 36 "github.com/google/osv-scalibr/plugin" 37 "github.com/google/osv-scalibr/purl" 38 39 cpb "github.com/google/osv-scalibr/binary/proto/config_go_proto" 40 ) 41 42 const ( 43 // Name is the unique name of this extractor. 44 Name = "java/pomxmlnet" 45 ) 46 47 // Extractor extracts Maven packages with transitive dependency resolution. 48 type Extractor struct { 49 DepClient resolve.Client 50 MavenClient *datasource.MavenRegistryAPIClient 51 } 52 53 // New makes a new pom.xml transitive extractor with the given config. 54 func New(cfg *cpb.PluginConfig) filesystem.Extractor { 55 upstreamRegistry := "" 56 specific := plugin.FindConfig(cfg, func(c *cpb.PluginSpecificConfig) *cpb.POMXMLNetConfig { return c.GetPomXmlNet() }) 57 if specific != nil { 58 upstreamRegistry = specific.UpstreamRegistry 59 } 60 61 // No need to check errors since we are using the default Maven Central URL. 62 mavenClient, _ := datasource.NewMavenRegistryAPIClient(context.Background(), datasource.MavenRegistry{ 63 URL: upstreamRegistry, 64 ReleasesEnabled: true, 65 }, cfg.LocalRegistry, cfg.DisableGoogleAuth) 66 depClient := resolution.NewMavenRegistryClientWithAPI(mavenClient) 67 68 return &Extractor{ 69 DepClient: depClient, 70 MavenClient: mavenClient, 71 } 72 } 73 74 // Name of the extractor. 75 func (e Extractor) Name() string { return Name } 76 77 // Version of the extractor. 78 func (e Extractor) Version() int { return 0 } 79 80 // Requirements of the extractor. 81 func (e Extractor) Requirements() *plugin.Capabilities { 82 return &plugin.Capabilities{ 83 Network: plugin.NetworkOnline, 84 DirectFS: true, 85 } 86 } 87 88 // FileRequired returns true if the specified file matches Maven POM lockfile patterns. 89 func (e Extractor) FileRequired(fapi filesystem.FileAPI) bool { 90 return filepath.Base(fapi.Path()) == "pom.xml" 91 } 92 93 // Extract extracts packages from pom.xml files passed through the scan input. 94 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 95 var project maven.Project 96 if err := datasource.NewMavenDecoder(input.Reader).Decode(&project); err != nil { 97 return inventory.Inventory{}, fmt.Errorf("could not extract: %w", err) 98 } 99 // Empty JDK and ActivationOS indicates merging the default profiles. 100 if err := project.MergeProfiles("", maven.ActivationOS{}); err != nil { 101 return inventory.Inventory{}, fmt.Errorf("failed to merge profiles: %w", err) 102 } 103 // Interpolate the repositories so that properties are resolved. 104 if err := project.InterpolateRepositories(); err != nil { 105 return inventory.Inventory{}, fmt.Errorf("failed to interpolate project: %w", err) 106 } 107 // Clear the registries that may be from other extraction. 108 e.MavenClient = e.MavenClient.WithoutRegistries() 109 for _, repo := range project.Repositories { 110 if repo.URL.ContainsProperty() { 111 continue 112 } 113 if err := e.MavenClient.AddRegistry(ctx, datasource.MavenRegistry{ 114 URL: string(repo.URL), 115 ID: string(repo.ID), 116 ReleasesEnabled: repo.Releases.Enabled.Boolean(), 117 SnapshotsEnabled: repo.Snapshots.Enabled.Boolean(), 118 }); err != nil { 119 return inventory.Inventory{}, fmt.Errorf("failed to add registry %s: %w", repo.URL, err) 120 } 121 } 122 // Merging parents data by parsing local parent pom.xml or fetching from upstream. 123 if err := mavenutil.MergeParents(ctx, project.Parent, &project, mavenutil.Options{ 124 Input: input, 125 Client: e.MavenClient, 126 AddRegistry: true, 127 AllowLocal: true, 128 InitialParentIndex: 1, 129 }); err != nil { 130 return inventory.Inventory{}, fmt.Errorf("failed to merge parents: %w", err) 131 } 132 // Process the dependencies: 133 // - dedupe dependencies and dependency management 134 // - import dependency management 135 // - fill in missing dependency version requirement 136 project.ProcessDependencies(func(groupID, artifactID, version maven.String) (maven.DependencyManagement, error) { 137 return mavenutil.GetDependencyManagement(ctx, e.MavenClient, groupID, artifactID, version) 138 }) 139 140 if registries := e.MavenClient.GetRegistries(); len(registries) > 0 { 141 clientRegs := make([]resolution.Registry, len(registries)) 142 for i, reg := range registries { 143 clientRegs[i] = reg 144 } 145 if cl, ok := e.DepClient.(resolution.ClientWithRegistries); ok { 146 if err := cl.AddRegistries(ctx, clientRegs); err != nil { 147 return inventory.Inventory{}, err 148 } 149 } 150 } 151 152 overrideClient := resolution.NewOverrideClient(e.DepClient) 153 resolver := mavenresolve.NewResolver(overrideClient) 154 155 // Resolve the dependencies. 156 root := resolve.Version{ 157 VersionKey: resolve.VersionKey{ 158 PackageKey: resolve.PackageKey{ 159 System: resolve.Maven, 160 Name: project.ProjectKey.Name(), 161 }, 162 VersionType: resolve.Concrete, 163 Version: string(project.Version), 164 }} 165 reqs := make([]resolve.RequirementVersion, len(project.Dependencies)+len(project.DependencyManagement.Dependencies)) 166 for i, d := range project.Dependencies { 167 reqs[i] = resolve.RequirementVersion{ 168 VersionKey: resolve.VersionKey{ 169 PackageKey: resolve.PackageKey{ 170 System: resolve.Maven, 171 Name: d.Name(), 172 }, 173 VersionType: resolve.Requirement, 174 Version: string(d.Version), 175 }, 176 Type: resolve.MavenDepType(d, ""), 177 } 178 } 179 for i, d := range project.DependencyManagement.Dependencies { 180 reqs[len(project.Dependencies)+i] = resolve.RequirementVersion{ 181 VersionKey: resolve.VersionKey{ 182 PackageKey: resolve.PackageKey{ 183 System: resolve.Maven, 184 Name: d.Name(), 185 }, 186 VersionType: resolve.Requirement, 187 Version: string(d.Version), 188 }, 189 Type: resolve.MavenDepType(d, mavenutil.OriginManagement), 190 } 191 } 192 overrideClient.AddVersion(root, reqs) 193 194 g, err := resolver.Resolve(ctx, root.VersionKey) 195 if err != nil { 196 return inventory.Inventory{}, fmt.Errorf("failed resolving %v: %w", root, err) 197 } 198 if len(g.Nodes) <= 1 && g.Error != "" { 199 // Multi-registry error may be appended to the resolved graph so only return error when the graph is empty. 200 return inventory.Inventory{}, fmt.Errorf("failed resolving %v: %s", root, g.Error) 201 } 202 203 details := map[string]*extractor.Package{} 204 for i := 1; i < len(g.Nodes); i++ { 205 // Ignore the first node which is the root. 206 node := g.Nodes[i] 207 depGroups := []string{} 208 groupID, artifactID, _ := strings.Cut(node.Version.Name, ":") 209 // We are only able to know dependency groups of direct dependencies but 210 // not transitive dependencies because the nodes in the resolve graph does 211 // not have the scope information. 212 isDirect := false 213 for _, dep := range project.Dependencies { 214 if dep.Name() != node.Version.Name { 215 continue 216 } 217 isDirect = true 218 if dep.Scope != "" && dep.Scope != "compile" { 219 depGroups = append(depGroups, string(dep.Scope)) 220 } 221 break 222 } 223 pkg := extractor.Package{ 224 Name: node.Version.Name, 225 Version: node.Version.Version, 226 PURLType: purl.TypeMaven, 227 Metadata: &javalockfile.Metadata{ 228 ArtifactID: artifactID, 229 GroupID: groupID, 230 DepGroupVals: depGroups, 231 IsTransitive: !isDirect, 232 }, 233 // TODO(#408): Add merged paths in here as well 234 Locations: []string{input.Path}, 235 } 236 details[pkg.Name] = &pkg 237 } 238 239 return inventory.Inventory{Packages: slices.Collect(maps.Values(details))}, nil 240 } 241 242 var _ filesystem.Extractor = Extractor{}