github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/java/pomxml/pomxml.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package pomxml extracts pom.xml files. 16 package pomxml 17 18 import ( 19 "context" 20 "encoding/xml" 21 "fmt" 22 "maps" 23 "path/filepath" 24 "regexp" 25 "slices" 26 "strings" 27 28 "deps.dev/util/maven" 29 30 "github.com/google/osv-scalibr/extractor" 31 "github.com/google/osv-scalibr/extractor/filesystem" 32 "github.com/google/osv-scalibr/extractor/filesystem/language/java/javalockfile" 33 "github.com/google/osv-scalibr/internal/mavenutil" 34 "github.com/google/osv-scalibr/inventory" 35 "github.com/google/osv-scalibr/log" 36 "github.com/google/osv-scalibr/plugin" 37 "github.com/google/osv-scalibr/purl" 38 ) 39 40 const ( 41 // Name is the unique name of this extractor. 42 Name = "java/pomxml" 43 ) 44 45 // "Constant" at the top to compile this regex only once. 46 var ( 47 versionRequirementReg = regexp.MustCompile(`[[(]?(.*?)(?:,|[)\]]|$)`) 48 ) 49 50 func parseResolvedVersion(version maven.String) string { 51 results := versionRequirementReg.FindStringSubmatch(string(version)) 52 // First capture group will always exist, but might be empty, therefore the slice will always 53 // have a length of 2. 54 if results == nil || results[1] == "" { 55 return "" 56 } 57 58 return results[1] 59 } 60 61 // Extractor extracts Maven packages from pom.xml files. 62 type Extractor struct{} 63 64 // New returns a new instance of the extractor. 65 func New() filesystem.Extractor { return &Extractor{} } 66 67 // Name of the extractor 68 func (e Extractor) Name() string { return Name } 69 70 // Version of the extractor 71 func (e Extractor) Version() int { return 0 } 72 73 // Requirements of the extractor 74 func (e Extractor) Requirements() *plugin.Capabilities { 75 return &plugin.Capabilities{Network: plugin.NetworkOffline} 76 } 77 78 // FileRequired returns true if the specified file matches Maven POM lockfile patterns. 79 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 80 return filepath.Base(api.Path()) == "pom.xml" || filepath.Ext(api.Path()) == ".pom" 81 } 82 83 // Extract extracts packages from pom.xml files passed through the scan input. 84 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 85 var project *maven.Project 86 87 if err := xml.NewDecoder(input.Reader).Decode(&project); err != nil { 88 err := fmt.Errorf("could not extract pom from %s: %w", input.Path, err) 89 log.Errorf(err.Error()) 90 return inventory.Inventory{}, err 91 } 92 if err := project.Interpolate(); err != nil { 93 err := fmt.Errorf("failed to interpolate pom for %s in %s: %w", project.Name, input.Path, err) 94 log.Errorf(err.Error()) 95 return inventory.Inventory{}, err 96 } 97 98 // Merging parents data by parsing local parent pom.xml. 99 if err := mavenutil.MergeParents(ctx, project.Parent, project, mavenutil.Options{ 100 Input: input, 101 AllowLocal: true, 102 InitialParentIndex: 1, 103 }); err != nil { 104 err := fmt.Errorf("failed to merge parents for %s in %s: %w", project.Name, input.Path, err) 105 log.Errorf(err.Error()) 106 return inventory.Inventory{}, err 107 } 108 // Process the dependencies: 109 // - dedupe dependencies and dependency management 110 // - import dependency management 111 // - fill in missing dependency version requirement 112 project.ProcessDependencies(func(groupID, artifactID, version maven.String) (maven.DependencyManagement, error) { 113 // There is no network access so return an empty list of dependency management. 114 return maven.DependencyManagement{}, nil 115 }) 116 117 details := map[string]*extractor.Package{} 118 119 for _, dep := range project.Dependencies { 120 g, a, found := strings.Cut(dep.Name(), ":") 121 if !found { 122 err := fmt.Errorf("invalid package name %q for %s in %s", dep.Name(), project.Name, input.Path) 123 log.Errorf(err.Error()) 124 return inventory.Inventory{}, err 125 } 126 127 depType := "" 128 if dep.Type != "jar" { 129 depType = string(dep.Type) 130 } 131 132 metadata := javalockfile.Metadata{ 133 ArtifactID: a, 134 GroupID: g, 135 Type: depType, 136 Classifier: string(dep.Classifier), 137 DepGroupVals: []string{}, 138 } 139 pkgDetails := &extractor.Package{ 140 Name: dep.Name(), 141 Version: parseResolvedVersion(dep.Version), 142 PURLType: purl.TypeMaven, 143 Locations: []string{input.Path}, 144 Metadata: &metadata, 145 } 146 if scope := strings.TrimSpace(string(dep.Scope)); scope != "" && scope != "compile" { 147 // Only append non-default scope (compile is the default scope). 148 metadata.DepGroupVals = []string{scope} 149 } 150 details[dep.Name()] = pkgDetails 151 } 152 153 return inventory.Inventory{Packages: slices.Collect(maps.Values(details))}, nil 154 } 155 156 var _ filesystem.Extractor = Extractor{}