github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/java/pomxml/pomxml.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package pomxml extracts pom.xml files.
    16  package pomxml
    17  
    18  import (
    19  	"context"
    20  	"encoding/xml"
    21  	"fmt"
    22  	"maps"
    23  	"path/filepath"
    24  	"regexp"
    25  	"slices"
    26  	"strings"
    27  
    28  	"deps.dev/util/maven"
    29  
    30  	"github.com/google/osv-scalibr/extractor"
    31  	"github.com/google/osv-scalibr/extractor/filesystem"
    32  	"github.com/google/osv-scalibr/extractor/filesystem/language/java/javalockfile"
    33  	"github.com/google/osv-scalibr/internal/mavenutil"
    34  	"github.com/google/osv-scalibr/inventory"
    35  	"github.com/google/osv-scalibr/log"
    36  	"github.com/google/osv-scalibr/plugin"
    37  	"github.com/google/osv-scalibr/purl"
    38  )
    39  
    40  const (
    41  	// Name is the unique name of this extractor.
    42  	Name = "java/pomxml"
    43  )
    44  
    45  // "Constant" at the top to compile this regex only once.
    46  var (
    47  	versionRequirementReg = regexp.MustCompile(`[[(]?(.*?)(?:,|[)\]]|$)`)
    48  )
    49  
    50  func parseResolvedVersion(version maven.String) string {
    51  	results := versionRequirementReg.FindStringSubmatch(string(version))
    52  	// First capture group will always exist, but might be empty, therefore the slice will always
    53  	// have a length of 2.
    54  	if results == nil || results[1] == "" {
    55  		return ""
    56  	}
    57  
    58  	return results[1]
    59  }
    60  
    61  // Extractor extracts Maven packages from pom.xml files.
    62  type Extractor struct{}
    63  
    64  // New returns a new instance of the extractor.
    65  func New() filesystem.Extractor { return &Extractor{} }
    66  
    67  // Name of the extractor
    68  func (e Extractor) Name() string { return Name }
    69  
    70  // Version of the extractor
    71  func (e Extractor) Version() int { return 0 }
    72  
    73  // Requirements of the extractor
    74  func (e Extractor) Requirements() *plugin.Capabilities {
    75  	return &plugin.Capabilities{Network: plugin.NetworkOffline}
    76  }
    77  
    78  // FileRequired returns true if the specified file matches Maven POM lockfile patterns.
    79  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
    80  	return filepath.Base(api.Path()) == "pom.xml" || filepath.Ext(api.Path()) == ".pom"
    81  }
    82  
    83  // Extract extracts packages from pom.xml files passed through the scan input.
    84  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
    85  	var project *maven.Project
    86  
    87  	if err := xml.NewDecoder(input.Reader).Decode(&project); err != nil {
    88  		err := fmt.Errorf("could not extract pom from %s: %w", input.Path, err)
    89  		log.Errorf(err.Error())
    90  		return inventory.Inventory{}, err
    91  	}
    92  	if err := project.Interpolate(); err != nil {
    93  		err := fmt.Errorf("failed to interpolate pom for %s in %s: %w", project.Name, input.Path, err)
    94  		log.Errorf(err.Error())
    95  		return inventory.Inventory{}, err
    96  	}
    97  
    98  	// Merging parents data by parsing local parent pom.xml.
    99  	if err := mavenutil.MergeParents(ctx, project.Parent, project, mavenutil.Options{
   100  		Input:              input,
   101  		AllowLocal:         true,
   102  		InitialParentIndex: 1,
   103  	}); err != nil {
   104  		err := fmt.Errorf("failed to merge parents for %s in %s: %w", project.Name, input.Path, err)
   105  		log.Errorf(err.Error())
   106  		return inventory.Inventory{}, err
   107  	}
   108  	// Process the dependencies:
   109  	//  - dedupe dependencies and dependency management
   110  	//  - import dependency management
   111  	//  - fill in missing dependency version requirement
   112  	project.ProcessDependencies(func(groupID, artifactID, version maven.String) (maven.DependencyManagement, error) {
   113  		// There is no network access so return an empty list of dependency management.
   114  		return maven.DependencyManagement{}, nil
   115  	})
   116  
   117  	details := map[string]*extractor.Package{}
   118  
   119  	for _, dep := range project.Dependencies {
   120  		g, a, found := strings.Cut(dep.Name(), ":")
   121  		if !found {
   122  			err := fmt.Errorf("invalid package name %q for %s in %s", dep.Name(), project.Name, input.Path)
   123  			log.Errorf(err.Error())
   124  			return inventory.Inventory{}, err
   125  		}
   126  
   127  		depType := ""
   128  		if dep.Type != "jar" {
   129  			depType = string(dep.Type)
   130  		}
   131  
   132  		metadata := javalockfile.Metadata{
   133  			ArtifactID:   a,
   134  			GroupID:      g,
   135  			Type:         depType,
   136  			Classifier:   string(dep.Classifier),
   137  			DepGroupVals: []string{},
   138  		}
   139  		pkgDetails := &extractor.Package{
   140  			Name:      dep.Name(),
   141  			Version:   parseResolvedVersion(dep.Version),
   142  			PURLType:  purl.TypeMaven,
   143  			Locations: []string{input.Path},
   144  			Metadata:  &metadata,
   145  		}
   146  		if scope := strings.TrimSpace(string(dep.Scope)); scope != "" && scope != "compile" {
   147  			// Only append non-default scope (compile is the default scope).
   148  			metadata.DepGroupVals = []string{scope}
   149  		}
   150  		details[dep.Name()] = pkgDetails
   151  	}
   152  
   153  	return inventory.Inventory{Packages: slices.Collect(maps.Values(details))}, nil
   154  }
   155  
   156  var _ filesystem.Extractor = Extractor{}