github.com/google/osv-scalibr@v0.4.1/internal/mavenutil/maven.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package mavenutil provides utilities for merging Maven pom/xml. 16 package mavenutil 17 18 import ( 19 "context" 20 "errors" 21 "fmt" 22 "path/filepath" 23 "strings" 24 25 "deps.dev/util/maven" 26 "deps.dev/util/resolve" 27 "deps.dev/util/semver" 28 "github.com/google/osv-scalibr/clients/datasource" 29 "github.com/google/osv-scalibr/extractor/filesystem" 30 ) 31 32 // Origin of the dependencies. 33 const ( 34 OriginManagement = "management" 35 OriginParent = "parent" 36 OriginPlugin = "plugin" 37 OriginProfile = "profile" 38 ) 39 40 // MaxParent sets a limit on the number of parents to avoid indefinite loop. 41 const MaxParent = 100 42 43 // Options for merging parent data. 44 // - Input is the scan input for the current project. 45 // - Client is the Maven registry API client for fetching remote pom.xml. 46 // - AllowLocal indicates whether parsing local parent pom.xml is allowed. 47 // - InitialParentIndex indicates the index of the current parent project, which is 48 // used to check if the packaging has to be `pom`. 49 type Options struct { 50 Input *filesystem.ScanInput 51 Client *datasource.MavenRegistryAPIClient 52 53 AddRegistry bool 54 AllowLocal bool 55 InitialParentIndex int 56 } 57 58 // MergeParents parses local accessible parent pom.xml or fetches it from 59 // upstream, merges into root project, then interpolate the properties. 60 // - current holds the current parent project to merge. 61 // - result holds the Maven project to merge into, this is modified in place. 62 // - opts holds the options for merging parent data. 63 func MergeParents(ctx context.Context, current maven.Parent, result *maven.Project, opts Options) error { 64 currentPath := "" 65 if opts.Input != nil { 66 currentPath = opts.Input.Path 67 } 68 69 allowLocal := opts.AllowLocal 70 visited := make(map[maven.ProjectKey]struct{}, MaxParent) 71 for n := opts.InitialParentIndex; n < MaxParent; n++ { 72 if current.GroupID == "" || current.ArtifactID == "" || current.Version == "" { 73 break 74 } 75 if _, ok := visited[current.ProjectKey]; ok { 76 // A cycle of parents is detected 77 return errors.New("a cycle of parents is detected") 78 } 79 visited[current.ProjectKey] = struct{}{} 80 81 var proj maven.Project 82 parentFoundLocally := false 83 if allowLocal { 84 var parentPath string 85 var err error 86 parentFoundLocally, parentPath, err = loadParentLocal(opts.Input, current, currentPath, &proj) 87 if err != nil { 88 return fmt.Errorf("failed to load parent at %s: %w", currentPath, err) 89 } 90 if parentPath != "" { 91 currentPath = parentPath 92 } 93 } 94 if !parentFoundLocally { 95 // Once we fetch a parent pom.xml from upstream, we should not 96 // allow parsing parent pom.xml locally anymore. 97 allowLocal = false 98 var err error 99 proj, err = loadParentRemote(ctx, opts.Client, current, n) 100 if err != nil { 101 return fmt.Errorf("failed to load parent from remote: %w", err) 102 } 103 } 104 // Use an empty JDK string and ActivationOS here to merge the default profiles. 105 if err := result.MergeProfiles("", maven.ActivationOS{}); err != nil { 106 return fmt.Errorf("failed to merge default profiles: %w", err) 107 } 108 if opts.Client != nil && opts.AddRegistry && len(proj.Repositories) > 0 { 109 for _, repo := range proj.Repositories { 110 if err := opts.Client.AddRegistry(ctx, datasource.MavenRegistry{ 111 URL: string(repo.URL), 112 ID: string(repo.ID), 113 ReleasesEnabled: repo.Releases.Enabled.Boolean(), 114 SnapshotsEnabled: repo.Snapshots.Enabled.Boolean(), 115 }); err != nil { 116 return fmt.Errorf("failed to add registry %s: %w", repo.URL, err) 117 } 118 } 119 } 120 result.MergeParent(proj) 121 current = proj.Parent 122 } 123 // Interpolate the project to resolve the properties. 124 return result.Interpolate() 125 } 126 127 // loadParentLocal loads a parent Maven project from local file system 128 // and returns whether parent is found locally as well as parent path. 129 func loadParentLocal(input *filesystem.ScanInput, parent maven.Parent, path string, result *maven.Project) (bool, string, error) { 130 parentPath := ParentPOMPath(input, path, string(parent.RelativePath)) 131 if parentPath == "" { 132 return false, "", nil 133 } 134 f, err := input.FS.Open(parentPath) 135 if err != nil { 136 return false, "", fmt.Errorf("failed to open parent file %s: %w", parentPath, err) 137 } 138 err = datasource.NewMavenDecoder(f).Decode(result) 139 if closeErr := f.Close(); closeErr != nil { 140 return false, "", fmt.Errorf("failed to close file: %w", err) 141 } 142 if err != nil { 143 return false, "", fmt.Errorf("failed to unmarshal project: %w", err) 144 } 145 if ProjectKey(*result) != parent.ProjectKey || result.Packaging != "pom" { 146 // Only mark parent as found when the identifiers and packaging are expected. 147 return false, "", nil 148 } 149 return true, parentPath, nil 150 } 151 152 // loadParentRemote loads a parent from remote registry. 153 func loadParentRemote(ctx context.Context, mavenClient *datasource.MavenRegistryAPIClient, parent maven.Parent, parentIndex int) (maven.Project, error) { 154 if mavenClient == nil { 155 // The client is not available, so return an empty project. 156 return maven.Project{}, nil 157 } 158 159 proj, err := mavenClient.GetProject(ctx, string(parent.GroupID), string(parent.ArtifactID), string(parent.Version)) 160 if err != nil { 161 return maven.Project{}, fmt.Errorf("failed to get Maven project %s:%s:%s: %w", parent.GroupID, parent.ArtifactID, parent.Version, err) 162 } 163 if parentIndex > 0 && proj.Packaging != "pom" { 164 // A parent project should only be of "pom" packaging type. 165 return maven.Project{}, fmt.Errorf("invalid packaging for parent project %s", proj.Packaging) 166 } 167 if ProjectKey(proj) != parent.ProjectKey { 168 // The identifiers in parent does not match what we want. 169 return maven.Project{}, fmt.Errorf("parent identifiers mismatch: %v, expect %v", proj.ProjectKey, parent.ProjectKey) 170 } 171 return proj, nil 172 } 173 174 // ProjectKey returns a project key with empty groupId/version 175 // filled by corresponding fields in parent. 176 func ProjectKey(proj maven.Project) maven.ProjectKey { 177 if proj.GroupID == "" { 178 proj.GroupID = proj.Parent.GroupID 179 } 180 if proj.Version == "" { 181 proj.Version = proj.Parent.Version 182 } 183 184 return proj.ProjectKey 185 } 186 187 // ParentPOMPath returns the path of a parent pom.xml. 188 // Maven looks for the parent POM first in 'relativePath', then 189 // the local repository '../pom.xml', and lastly in the remote repo. 190 // An empty string is returned if failed to resolve the parent path. 191 func ParentPOMPath(input *filesystem.ScanInput, currentPath, relativePath string) string { 192 if relativePath == "" { 193 relativePath = "../pom.xml" 194 } 195 196 path := filepath.ToSlash(filepath.Join(filepath.Dir(currentPath), relativePath)) 197 if info, err := input.FS.Stat(path); err == nil { 198 if !info.IsDir() { 199 return path 200 } 201 // Current path is a directory, so look for pom.xml in the directory. 202 path = filepath.ToSlash(filepath.Join(path, "pom.xml")) 203 if _, err := input.FS.Stat(path); err == nil { 204 return path 205 } 206 } 207 208 return "" 209 } 210 211 // GetDependencyManagement returns managed dependencies in the specified Maven project by fetching remote pom.xml. 212 func GetDependencyManagement(ctx context.Context, client *datasource.MavenRegistryAPIClient, groupID, artifactID, version maven.String) (maven.DependencyManagement, error) { 213 root := maven.Parent{ProjectKey: maven.ProjectKey{GroupID: groupID, ArtifactID: artifactID, Version: version}} 214 var result maven.Project 215 // To get dependency management from another project, we need the 216 // project with parents merged, so we call MergeParents by passing 217 // an empty project. 218 if err := MergeParents(ctx, root, &result, Options{ 219 Client: client, 220 AddRegistry: false, 221 AllowLocal: false, 222 InitialParentIndex: 0, 223 }); err != nil { 224 return maven.DependencyManagement{}, err 225 } 226 227 return result.DependencyManagement, nil 228 } 229 230 // CompareVersions compares two Maven semver versions with special behaviour for specific packages, 231 // producing more desirable ordering using non-standard comparison. 232 func CompareVersions(vk resolve.VersionKey, a *semver.Version, b *semver.Version) int { 233 if a == nil || b == nil { 234 if a == nil { 235 return -1 236 } 237 238 return 1 239 } 240 241 if vk.Name == "com.google.guava:guava" { 242 // com.google.guava:guava has 'flavors' with versions ending with -jre or -android. 243 // https://github.com/google/guava/wiki/ReleasePolicy#flavors 244 // To preserve the flavor in updates, we make the opposite flavor considered the earliest versions. 245 246 // Old versions have '22.0' and '22.0-android', and even older version don't have any flavors. 247 // Only check for the android flavor, and assume its jre otherwise. 248 wantAndroid := strings.HasSuffix(vk.Version, "-android") 249 250 aIsAndroid := strings.HasSuffix(a.String(), "-android") 251 bIsAndroid := strings.HasSuffix(b.String(), "-android") 252 253 if aIsAndroid == bIsAndroid { 254 return a.Compare(b) 255 } 256 257 if aIsAndroid == wantAndroid { 258 return 1 259 } 260 261 return -1 262 } 263 264 // Old versions of apache commons-* libraries (commons-io:commons-io, commons-math:commons-math, etc.) 265 // used date-based versions (e.g. 20040118.003354), which naturally sort after the more recent semver versions. 266 // We manually force the date versions to come before the others to prevent downgrades. 267 if strings.HasPrefix(vk.Name, "commons-") { 268 // All date-based versions of these packages seem to be in the years 2002-2005. 269 // It's extremely unlikely we'd see any versions dated before 1999 or after 2010. 270 // It's also unlikely we'd see any major versions of these packages reach up to 200.0.0. 271 // Checking if the version starts with "200" should therefore be sufficient to determine if it's a year. 272 aCal := strings.HasPrefix(a.String(), "200") 273 bCal := strings.HasPrefix(b.String(), "200") 274 275 if aCal == bCal { 276 return a.Compare(b) 277 } 278 279 if aCal { 280 return -1 281 } 282 283 return 1 284 } 285 286 return a.Compare(b) 287 } 288 289 // IsPrerelease returns whether the given version is a prerelease version. 290 // There is a special handling for com.google.guava:guava, which has 'flavors' with versions ending 291 // with '-jre' or '-android'. These versions are not considered as prerelease versions. 292 func IsPrerelease(ver *semver.Version, vk resolve.VersionKey) bool { 293 if vk.Name == "com.google.guava:guava" { 294 return false 295 } 296 return ver.IsPrerelease() 297 }