github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/java/maven_repo_utils.go (about)

     1  package java
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"net/http"
     8  	"net/url"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/vifraa/gopom"
    13  
    14  	"github.com/anchore/syft/internal/log"
    15  )
    16  
    17  func formatMavenPomURL(groupID, artifactID, version, mavenBaseURL string) (requestURL string, err error) {
    18  	// groupID needs to go from maven.org -> maven/org
    19  	urlPath := strings.Split(groupID, ".")
    20  	artifactPom := fmt.Sprintf("%s-%s.pom", artifactID, version)
    21  	urlPath = append(urlPath, artifactID, version, artifactPom)
    22  
    23  	// ex:"https://repo1.maven.org/maven2/groupID/artifactID/artifactPom
    24  	requestURL, err = url.JoinPath(mavenBaseURL, urlPath...)
    25  	if err != nil {
    26  		return requestURL, fmt.Errorf("could not construct maven url: %w", err)
    27  	}
    28  	return requestURL, err
    29  }
    30  
    31  // An artifact can have its version defined in a parent's DependencyManagement section
    32  func recursivelyFindVersionFromParentPom(ctx context.Context, groupID, artifactID, parentGroupID, parentArtifactID, parentVersion string, cfg ArchiveCatalogerConfig) string {
    33  	// As there can be nested parent poms, we'll recursively check for the version until we reach the max depth
    34  	for i := 0; i < cfg.MaxParentRecursiveDepth; i++ {
    35  		parentPom, err := getPomFromMavenRepo(ctx, parentGroupID, parentArtifactID, parentVersion, cfg.MavenBaseURL)
    36  		if err != nil {
    37  			// We don't want to abort here as the parent pom might not exist in Maven Central, we'll just log the error
    38  			log.Tracef("unable to get parent pom from Maven central: %v", err)
    39  			break
    40  		}
    41  		if parentPom != nil && parentPom.DependencyManagement != nil {
    42  			for _, dependency := range *parentPom.DependencyManagement.Dependencies {
    43  				if groupID == *dependency.GroupID && artifactID == *dependency.ArtifactID && dependency.Version != nil {
    44  					return *dependency.Version
    45  				}
    46  			}
    47  		}
    48  		if parentPom == nil || parentPom.Parent == nil {
    49  			break
    50  		}
    51  		parentGroupID = *parentPom.Parent.GroupID
    52  		parentArtifactID = *parentPom.Parent.ArtifactID
    53  		parentVersion = *parentPom.Parent.Version
    54  	}
    55  	return ""
    56  }
    57  
    58  func recursivelyFindLicensesFromParentPom(ctx context.Context, groupID, artifactID, version string, cfg ArchiveCatalogerConfig) []string {
    59  	var licenses []string
    60  	// As there can be nested parent poms, we'll recursively check for licenses until we reach the max depth
    61  	for i := 0; i < cfg.MaxParentRecursiveDepth; i++ {
    62  		parentPom, err := getPomFromMavenRepo(ctx, groupID, artifactID, version, cfg.MavenBaseURL)
    63  		if err != nil {
    64  			// We don't want to abort here as the parent pom might not exist in Maven Central, we'll just log the error
    65  			log.Tracef("unable to get parent pom from Maven central: %v", err)
    66  			return []string{}
    67  		}
    68  		parentLicenses := parseLicensesFromPom(parentPom)
    69  		if len(parentLicenses) > 0 || parentPom == nil || parentPom.Parent == nil {
    70  			licenses = parentLicenses
    71  			break
    72  		}
    73  
    74  		groupID = *parentPom.Parent.GroupID
    75  		artifactID = *parentPom.Parent.ArtifactID
    76  		version = *parentPom.Parent.Version
    77  	}
    78  
    79  	return licenses
    80  }
    81  
    82  func getPomFromMavenRepo(ctx context.Context, groupID, artifactID, version, mavenBaseURL string) (*gopom.Project, error) {
    83  	requestURL, err := formatMavenPomURL(groupID, artifactID, version, mavenBaseURL)
    84  	if err != nil {
    85  		return nil, err
    86  	}
    87  	log.Tracef("trying to fetch parent pom from Maven central %s", requestURL)
    88  
    89  	mavenRequest, err := http.NewRequest(http.MethodGet, requestURL, nil)
    90  	if err != nil {
    91  		return nil, fmt.Errorf("unable to format request for Maven central: %w", err)
    92  	}
    93  
    94  	httpClient := &http.Client{
    95  		Timeout: time.Second * 10,
    96  	}
    97  
    98  	mavenRequest = mavenRequest.WithContext(ctx)
    99  
   100  	resp, err := httpClient.Do(mavenRequest)
   101  	if err != nil {
   102  		return nil, fmt.Errorf("unable to get pom from Maven central: %w", err)
   103  	}
   104  	defer func() {
   105  		if err := resp.Body.Close(); err != nil {
   106  			log.Errorf("unable to close body: %+v", err)
   107  		}
   108  	}()
   109  
   110  	bytes, err := io.ReadAll(resp.Body)
   111  	if err != nil {
   112  		return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
   113  	}
   114  
   115  	pom, err := decodePomXML(strings.NewReader(string(bytes)))
   116  	if err != nil {
   117  		return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
   118  	}
   119  
   120  	return &pom, nil
   121  }
   122  
   123  func parseLicensesFromPom(pom *gopom.Project) []string {
   124  	var licenses []string
   125  	if pom != nil && pom.Licenses != nil {
   126  		for _, license := range *pom.Licenses {
   127  			if license.Name != nil {
   128  				licenses = append(licenses, *license.Name)
   129  			} else if license.URL != nil {
   130  				licenses = append(licenses, *license.URL)
   131  			}
   132  		}
   133  	}
   134  
   135  	return licenses
   136  }