github.com/google/osv-scalibr@v0.4.1/internal/mavenutil/maven.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package mavenutil provides utilities for merging Maven pom/xml.
    16  package mavenutil
    17  
    18  import (
    19  	"context"
    20  	"errors"
    21  	"fmt"
    22  	"path/filepath"
    23  	"strings"
    24  
    25  	"deps.dev/util/maven"
    26  	"deps.dev/util/resolve"
    27  	"deps.dev/util/semver"
    28  	"github.com/google/osv-scalibr/clients/datasource"
    29  	"github.com/google/osv-scalibr/extractor/filesystem"
    30  )
    31  
    32  // Origin of the dependencies.
    33  const (
    34  	OriginManagement = "management"
    35  	OriginParent     = "parent"
    36  	OriginPlugin     = "plugin"
    37  	OriginProfile    = "profile"
    38  )
    39  
    40  // MaxParent sets a limit on the number of parents to avoid indefinite loop.
    41  const MaxParent = 100
    42  
    43  // Options for merging parent data.
    44  //   - Input is the scan input for the current project.
    45  //   - Client is the Maven registry API client for fetching remote pom.xml.
    46  //   - AllowLocal indicates whether parsing local parent pom.xml is allowed.
    47  //   - InitialParentIndex indicates the index of the current parent project, which is
    48  //     used to check if the packaging has to be `pom`.
    49  type Options struct {
    50  	Input  *filesystem.ScanInput
    51  	Client *datasource.MavenRegistryAPIClient
    52  
    53  	AddRegistry        bool
    54  	AllowLocal         bool
    55  	InitialParentIndex int
    56  }
    57  
    58  // MergeParents parses local accessible parent pom.xml or fetches it from
    59  // upstream, merges into root project, then interpolate the properties.
    60  //   - current holds the current parent project to merge.
    61  //   - result holds the Maven project to merge into, this is modified in place.
    62  //   - opts holds the options for merging parent data.
    63  func MergeParents(ctx context.Context, current maven.Parent, result *maven.Project, opts Options) error {
    64  	currentPath := ""
    65  	if opts.Input != nil {
    66  		currentPath = opts.Input.Path
    67  	}
    68  
    69  	allowLocal := opts.AllowLocal
    70  	visited := make(map[maven.ProjectKey]struct{}, MaxParent)
    71  	for n := opts.InitialParentIndex; n < MaxParent; n++ {
    72  		if current.GroupID == "" || current.ArtifactID == "" || current.Version == "" {
    73  			break
    74  		}
    75  		if _, ok := visited[current.ProjectKey]; ok {
    76  			// A cycle of parents is detected
    77  			return errors.New("a cycle of parents is detected")
    78  		}
    79  		visited[current.ProjectKey] = struct{}{}
    80  
    81  		var proj maven.Project
    82  		parentFoundLocally := false
    83  		if allowLocal {
    84  			var parentPath string
    85  			var err error
    86  			parentFoundLocally, parentPath, err = loadParentLocal(opts.Input, current, currentPath, &proj)
    87  			if err != nil {
    88  				return fmt.Errorf("failed to load parent at %s: %w", currentPath, err)
    89  			}
    90  			if parentPath != "" {
    91  				currentPath = parentPath
    92  			}
    93  		}
    94  		if !parentFoundLocally {
    95  			// Once we fetch a parent pom.xml from upstream, we should not
    96  			// allow parsing parent pom.xml locally anymore.
    97  			allowLocal = false
    98  			var err error
    99  			proj, err = loadParentRemote(ctx, opts.Client, current, n)
   100  			if err != nil {
   101  				return fmt.Errorf("failed to load parent from remote: %w", err)
   102  			}
   103  		}
   104  		// Use an empty JDK string and ActivationOS here to merge the default profiles.
   105  		if err := result.MergeProfiles("", maven.ActivationOS{}); err != nil {
   106  			return fmt.Errorf("failed to merge default profiles: %w", err)
   107  		}
   108  		if opts.Client != nil && opts.AddRegistry && len(proj.Repositories) > 0 {
   109  			for _, repo := range proj.Repositories {
   110  				if err := opts.Client.AddRegistry(ctx, datasource.MavenRegistry{
   111  					URL:              string(repo.URL),
   112  					ID:               string(repo.ID),
   113  					ReleasesEnabled:  repo.Releases.Enabled.Boolean(),
   114  					SnapshotsEnabled: repo.Snapshots.Enabled.Boolean(),
   115  				}); err != nil {
   116  					return fmt.Errorf("failed to add registry %s: %w", repo.URL, err)
   117  				}
   118  			}
   119  		}
   120  		result.MergeParent(proj)
   121  		current = proj.Parent
   122  	}
   123  	// Interpolate the project to resolve the properties.
   124  	return result.Interpolate()
   125  }
   126  
   127  // loadParentLocal loads a parent Maven project from local file system
   128  // and returns whether parent is found locally as well as parent path.
   129  func loadParentLocal(input *filesystem.ScanInput, parent maven.Parent, path string, result *maven.Project) (bool, string, error) {
   130  	parentPath := ParentPOMPath(input, path, string(parent.RelativePath))
   131  	if parentPath == "" {
   132  		return false, "", nil
   133  	}
   134  	f, err := input.FS.Open(parentPath)
   135  	if err != nil {
   136  		return false, "", fmt.Errorf("failed to open parent file %s: %w", parentPath, err)
   137  	}
   138  	err = datasource.NewMavenDecoder(f).Decode(result)
   139  	if closeErr := f.Close(); closeErr != nil {
   140  		return false, "", fmt.Errorf("failed to close file: %w", err)
   141  	}
   142  	if err != nil {
   143  		return false, "", fmt.Errorf("failed to unmarshal project: %w", err)
   144  	}
   145  	if ProjectKey(*result) != parent.ProjectKey || result.Packaging != "pom" {
   146  		// Only mark parent as found when the identifiers and packaging are expected.
   147  		return false, "", nil
   148  	}
   149  	return true, parentPath, nil
   150  }
   151  
   152  // loadParentRemote loads a parent from remote registry.
   153  func loadParentRemote(ctx context.Context, mavenClient *datasource.MavenRegistryAPIClient, parent maven.Parent, parentIndex int) (maven.Project, error) {
   154  	if mavenClient == nil {
   155  		// The client is not available, so return an empty project.
   156  		return maven.Project{}, nil
   157  	}
   158  
   159  	proj, err := mavenClient.GetProject(ctx, string(parent.GroupID), string(parent.ArtifactID), string(parent.Version))
   160  	if err != nil {
   161  		return maven.Project{}, fmt.Errorf("failed to get Maven project %s:%s:%s: %w", parent.GroupID, parent.ArtifactID, parent.Version, err)
   162  	}
   163  	if parentIndex > 0 && proj.Packaging != "pom" {
   164  		// A parent project should only be of "pom" packaging type.
   165  		return maven.Project{}, fmt.Errorf("invalid packaging for parent project %s", proj.Packaging)
   166  	}
   167  	if ProjectKey(proj) != parent.ProjectKey {
   168  		// The identifiers in parent does not match what we want.
   169  		return maven.Project{}, fmt.Errorf("parent identifiers mismatch: %v, expect %v", proj.ProjectKey, parent.ProjectKey)
   170  	}
   171  	return proj, nil
   172  }
   173  
   174  // ProjectKey returns a project key with empty groupId/version
   175  // filled by corresponding fields in parent.
   176  func ProjectKey(proj maven.Project) maven.ProjectKey {
   177  	if proj.GroupID == "" {
   178  		proj.GroupID = proj.Parent.GroupID
   179  	}
   180  	if proj.Version == "" {
   181  		proj.Version = proj.Parent.Version
   182  	}
   183  
   184  	return proj.ProjectKey
   185  }
   186  
   187  // ParentPOMPath returns the path of a parent pom.xml.
   188  // Maven looks for the parent POM first in 'relativePath', then
   189  // the local repository '../pom.xml', and lastly in the remote repo.
   190  // An empty string is returned if failed to resolve the parent path.
   191  func ParentPOMPath(input *filesystem.ScanInput, currentPath, relativePath string) string {
   192  	if relativePath == "" {
   193  		relativePath = "../pom.xml"
   194  	}
   195  
   196  	path := filepath.ToSlash(filepath.Join(filepath.Dir(currentPath), relativePath))
   197  	if info, err := input.FS.Stat(path); err == nil {
   198  		if !info.IsDir() {
   199  			return path
   200  		}
   201  		// Current path is a directory, so look for pom.xml in the directory.
   202  		path = filepath.ToSlash(filepath.Join(path, "pom.xml"))
   203  		if _, err := input.FS.Stat(path); err == nil {
   204  			return path
   205  		}
   206  	}
   207  
   208  	return ""
   209  }
   210  
   211  // GetDependencyManagement returns managed dependencies in the specified Maven project by fetching remote pom.xml.
   212  func GetDependencyManagement(ctx context.Context, client *datasource.MavenRegistryAPIClient, groupID, artifactID, version maven.String) (maven.DependencyManagement, error) {
   213  	root := maven.Parent{ProjectKey: maven.ProjectKey{GroupID: groupID, ArtifactID: artifactID, Version: version}}
   214  	var result maven.Project
   215  	// To get dependency management from another project, we need the
   216  	// project with parents merged, so we call MergeParents by passing
   217  	// an empty project.
   218  	if err := MergeParents(ctx, root, &result, Options{
   219  		Client:             client,
   220  		AddRegistry:        false,
   221  		AllowLocal:         false,
   222  		InitialParentIndex: 0,
   223  	}); err != nil {
   224  		return maven.DependencyManagement{}, err
   225  	}
   226  
   227  	return result.DependencyManagement, nil
   228  }
   229  
   230  // CompareVersions compares two Maven semver versions with special behaviour for specific packages,
   231  // producing more desirable ordering using non-standard comparison.
   232  func CompareVersions(vk resolve.VersionKey, a *semver.Version, b *semver.Version) int {
   233  	if a == nil || b == nil {
   234  		if a == nil {
   235  			return -1
   236  		}
   237  
   238  		return 1
   239  	}
   240  
   241  	if vk.Name == "com.google.guava:guava" {
   242  		// com.google.guava:guava has 'flavors' with versions ending with -jre or -android.
   243  		// https://github.com/google/guava/wiki/ReleasePolicy#flavors
   244  		// To preserve the flavor in updates, we make the opposite flavor considered the earliest versions.
   245  
   246  		// Old versions have '22.0' and '22.0-android', and even older version don't have any flavors.
   247  		// Only check for the android flavor, and assume its jre otherwise.
   248  		wantAndroid := strings.HasSuffix(vk.Version, "-android")
   249  
   250  		aIsAndroid := strings.HasSuffix(a.String(), "-android")
   251  		bIsAndroid := strings.HasSuffix(b.String(), "-android")
   252  
   253  		if aIsAndroid == bIsAndroid {
   254  			return a.Compare(b)
   255  		}
   256  
   257  		if aIsAndroid == wantAndroid {
   258  			return 1
   259  		}
   260  
   261  		return -1
   262  	}
   263  
   264  	// Old versions of apache commons-* libraries (commons-io:commons-io, commons-math:commons-math, etc.)
   265  	// used date-based versions (e.g. 20040118.003354), which naturally sort after the more recent semver versions.
   266  	// We manually force the date versions to come before the others to prevent downgrades.
   267  	if strings.HasPrefix(vk.Name, "commons-") {
   268  		// All date-based versions of these packages seem to be in the years 2002-2005.
   269  		// It's extremely unlikely we'd see any versions dated before 1999 or after 2010.
   270  		// It's also unlikely we'd see any major versions of these packages reach up to 200.0.0.
   271  		// Checking if the version starts with "200" should therefore be sufficient to determine if it's a year.
   272  		aCal := strings.HasPrefix(a.String(), "200")
   273  		bCal := strings.HasPrefix(b.String(), "200")
   274  
   275  		if aCal == bCal {
   276  			return a.Compare(b)
   277  		}
   278  
   279  		if aCal {
   280  			return -1
   281  		}
   282  
   283  		return 1
   284  	}
   285  
   286  	return a.Compare(b)
   287  }
   288  
   289  // IsPrerelease returns whether the given version is a prerelease version.
   290  // There is a special handling for com.google.guava:guava, which has 'flavors' with versions ending
   291  // with '-jre' or '-android'. These versions are not considered as prerelease versions.
   292  func IsPrerelease(ver *semver.Version, vk resolve.VersionKey) bool {
   293  	if vk.Name == "com.google.guava:guava" {
   294  		return false
   295  	}
   296  	return ver.IsPrerelease()
   297  }