github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/java/pomxmlnet/pomxmlnet.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package pomxmlnet extracts Maven's pom.xml format with transitive dependency resolution.
    16  package pomxmlnet
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  	"maps"
    22  	"path/filepath"
    23  	"slices"
    24  	"strings"
    25  
    26  	"deps.dev/util/maven"
    27  	"deps.dev/util/resolve"
    28  	mavenresolve "deps.dev/util/resolve/maven"
    29  	"github.com/google/osv-scalibr/clients/datasource"
    30  	"github.com/google/osv-scalibr/clients/resolution"
    31  	"github.com/google/osv-scalibr/extractor"
    32  	"github.com/google/osv-scalibr/extractor/filesystem"
    33  	"github.com/google/osv-scalibr/extractor/filesystem/language/java/javalockfile"
    34  	"github.com/google/osv-scalibr/internal/mavenutil"
    35  	"github.com/google/osv-scalibr/inventory"
    36  	"github.com/google/osv-scalibr/plugin"
    37  	"github.com/google/osv-scalibr/purl"
    38  
    39  	cpb "github.com/google/osv-scalibr/binary/proto/config_go_proto"
    40  )
    41  
    42  const (
    43  	// Name is the unique name of this extractor.
    44  	Name = "java/pomxmlnet"
    45  )
    46  
    47  // Extractor extracts Maven packages with transitive dependency resolution.
    48  type Extractor struct {
    49  	DepClient   resolve.Client
    50  	MavenClient *datasource.MavenRegistryAPIClient
    51  }
    52  
    53  // New makes a new pom.xml transitive extractor with the given config.
    54  func New(cfg *cpb.PluginConfig) filesystem.Extractor {
    55  	upstreamRegistry := ""
    56  	specific := plugin.FindConfig(cfg, func(c *cpb.PluginSpecificConfig) *cpb.POMXMLNetConfig { return c.GetPomXmlNet() })
    57  	if specific != nil {
    58  		upstreamRegistry = specific.UpstreamRegistry
    59  	}
    60  
    61  	// No need to check errors since we are using the default Maven Central URL.
    62  	mavenClient, _ := datasource.NewMavenRegistryAPIClient(context.Background(), datasource.MavenRegistry{
    63  		URL:             upstreamRegistry,
    64  		ReleasesEnabled: true,
    65  	}, cfg.LocalRegistry, cfg.DisableGoogleAuth)
    66  	depClient := resolution.NewMavenRegistryClientWithAPI(mavenClient)
    67  
    68  	return &Extractor{
    69  		DepClient:   depClient,
    70  		MavenClient: mavenClient,
    71  	}
    72  }
    73  
    74  // Name of the extractor.
    75  func (e Extractor) Name() string { return Name }
    76  
    77  // Version of the extractor.
    78  func (e Extractor) Version() int { return 0 }
    79  
    80  // Requirements of the extractor.
    81  func (e Extractor) Requirements() *plugin.Capabilities {
    82  	return &plugin.Capabilities{
    83  		Network:  plugin.NetworkOnline,
    84  		DirectFS: true,
    85  	}
    86  }
    87  
    88  // FileRequired returns true if the specified file matches Maven POM lockfile patterns.
    89  func (e Extractor) FileRequired(fapi filesystem.FileAPI) bool {
    90  	return filepath.Base(fapi.Path()) == "pom.xml"
    91  }
    92  
    93  // Extract extracts packages from pom.xml files passed through the scan input.
    94  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
    95  	var project maven.Project
    96  	if err := datasource.NewMavenDecoder(input.Reader).Decode(&project); err != nil {
    97  		return inventory.Inventory{}, fmt.Errorf("could not extract: %w", err)
    98  	}
    99  	// Empty JDK and ActivationOS indicates merging the default profiles.
   100  	if err := project.MergeProfiles("", maven.ActivationOS{}); err != nil {
   101  		return inventory.Inventory{}, fmt.Errorf("failed to merge profiles: %w", err)
   102  	}
   103  	// Interpolate the repositories so that properties are resolved.
   104  	if err := project.InterpolateRepositories(); err != nil {
   105  		return inventory.Inventory{}, fmt.Errorf("failed to interpolate project: %w", err)
   106  	}
   107  	// Clear the registries that may be from other extraction.
   108  	e.MavenClient = e.MavenClient.WithoutRegistries()
   109  	for _, repo := range project.Repositories {
   110  		if repo.URL.ContainsProperty() {
   111  			continue
   112  		}
   113  		if err := e.MavenClient.AddRegistry(ctx, datasource.MavenRegistry{
   114  			URL:              string(repo.URL),
   115  			ID:               string(repo.ID),
   116  			ReleasesEnabled:  repo.Releases.Enabled.Boolean(),
   117  			SnapshotsEnabled: repo.Snapshots.Enabled.Boolean(),
   118  		}); err != nil {
   119  			return inventory.Inventory{}, fmt.Errorf("failed to add registry %s: %w", repo.URL, err)
   120  		}
   121  	}
   122  	// Merging parents data by parsing local parent pom.xml or fetching from upstream.
   123  	if err := mavenutil.MergeParents(ctx, project.Parent, &project, mavenutil.Options{
   124  		Input:              input,
   125  		Client:             e.MavenClient,
   126  		AddRegistry:        true,
   127  		AllowLocal:         true,
   128  		InitialParentIndex: 1,
   129  	}); err != nil {
   130  		return inventory.Inventory{}, fmt.Errorf("failed to merge parents: %w", err)
   131  	}
   132  	// Process the dependencies:
   133  	//  - dedupe dependencies and dependency management
   134  	//  - import dependency management
   135  	//  - fill in missing dependency version requirement
   136  	project.ProcessDependencies(func(groupID, artifactID, version maven.String) (maven.DependencyManagement, error) {
   137  		return mavenutil.GetDependencyManagement(ctx, e.MavenClient, groupID, artifactID, version)
   138  	})
   139  
   140  	if registries := e.MavenClient.GetRegistries(); len(registries) > 0 {
   141  		clientRegs := make([]resolution.Registry, len(registries))
   142  		for i, reg := range registries {
   143  			clientRegs[i] = reg
   144  		}
   145  		if cl, ok := e.DepClient.(resolution.ClientWithRegistries); ok {
   146  			if err := cl.AddRegistries(ctx, clientRegs); err != nil {
   147  				return inventory.Inventory{}, err
   148  			}
   149  		}
   150  	}
   151  
   152  	overrideClient := resolution.NewOverrideClient(e.DepClient)
   153  	resolver := mavenresolve.NewResolver(overrideClient)
   154  
   155  	// Resolve the dependencies.
   156  	root := resolve.Version{
   157  		VersionKey: resolve.VersionKey{
   158  			PackageKey: resolve.PackageKey{
   159  				System: resolve.Maven,
   160  				Name:   project.ProjectKey.Name(),
   161  			},
   162  			VersionType: resolve.Concrete,
   163  			Version:     string(project.Version),
   164  		}}
   165  	reqs := make([]resolve.RequirementVersion, len(project.Dependencies)+len(project.DependencyManagement.Dependencies))
   166  	for i, d := range project.Dependencies {
   167  		reqs[i] = resolve.RequirementVersion{
   168  			VersionKey: resolve.VersionKey{
   169  				PackageKey: resolve.PackageKey{
   170  					System: resolve.Maven,
   171  					Name:   d.Name(),
   172  				},
   173  				VersionType: resolve.Requirement,
   174  				Version:     string(d.Version),
   175  			},
   176  			Type: resolve.MavenDepType(d, ""),
   177  		}
   178  	}
   179  	for i, d := range project.DependencyManagement.Dependencies {
   180  		reqs[len(project.Dependencies)+i] = resolve.RequirementVersion{
   181  			VersionKey: resolve.VersionKey{
   182  				PackageKey: resolve.PackageKey{
   183  					System: resolve.Maven,
   184  					Name:   d.Name(),
   185  				},
   186  				VersionType: resolve.Requirement,
   187  				Version:     string(d.Version),
   188  			},
   189  			Type: resolve.MavenDepType(d, mavenutil.OriginManagement),
   190  		}
   191  	}
   192  	overrideClient.AddVersion(root, reqs)
   193  
   194  	g, err := resolver.Resolve(ctx, root.VersionKey)
   195  	if err != nil {
   196  		return inventory.Inventory{}, fmt.Errorf("failed resolving %v: %w", root, err)
   197  	}
   198  	if len(g.Nodes) <= 1 && g.Error != "" {
   199  		// Multi-registry error may be appended to the resolved graph so only return error when the graph is empty.
   200  		return inventory.Inventory{}, fmt.Errorf("failed resolving %v: %s", root, g.Error)
   201  	}
   202  
   203  	details := map[string]*extractor.Package{}
   204  	for i := 1; i < len(g.Nodes); i++ {
   205  		// Ignore the first node which is the root.
   206  		node := g.Nodes[i]
   207  		depGroups := []string{}
   208  		groupID, artifactID, _ := strings.Cut(node.Version.Name, ":")
   209  		// We are only able to know dependency groups of direct dependencies but
   210  		// not transitive dependencies because the nodes in the resolve graph does
   211  		// not have the scope information.
   212  		isDirect := false
   213  		for _, dep := range project.Dependencies {
   214  			if dep.Name() != node.Version.Name {
   215  				continue
   216  			}
   217  			isDirect = true
   218  			if dep.Scope != "" && dep.Scope != "compile" {
   219  				depGroups = append(depGroups, string(dep.Scope))
   220  			}
   221  			break
   222  		}
   223  		pkg := extractor.Package{
   224  			Name:     node.Version.Name,
   225  			Version:  node.Version.Version,
   226  			PURLType: purl.TypeMaven,
   227  			Metadata: &javalockfile.Metadata{
   228  				ArtifactID:   artifactID,
   229  				GroupID:      groupID,
   230  				DepGroupVals: depGroups,
   231  				IsTransitive: !isDirect,
   232  			},
   233  			// TODO(#408): Add merged paths in here as well
   234  			Locations: []string{input.Path},
   235  		}
   236  		details[pkg.Name] = &pkg
   237  	}
   238  
   239  	return inventory.Inventory{Packages: slices.Collect(maps.Values(details))}, nil
   240  }
   241  
   242  var _ filesystem.Extractor = Extractor{}