github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/python/license.go (about)

     1  package python
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	"io"
     8  	"net/http"
     9  	"net/url"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/anchore/syft/internal/cache"
    14  	"github.com/anchore/syft/internal/log"
    15  	"github.com/anchore/syft/syft/pkg"
    16  )
    17  
    18  type pythonLicenseResolver struct {
    19  	catalogerConfig CatalogerConfig
    20  	licenseCache    cache.Resolver[[]pkg.License]
    21  }
    22  
    23  func newPythonLicenseResolver(config CatalogerConfig) pythonLicenseResolver {
    24  	return pythonLicenseResolver{
    25  		licenseCache:    cache.GetResolverCachingErrors[[]pkg.License]("python", "v1"),
    26  		catalogerConfig: config,
    27  	}
    28  }
    29  
    30  func (lr *pythonLicenseResolver) getLicenses(ctx context.Context, packageName string, packageVersion string) pkg.LicenseSet {
    31  	var licenseSet pkg.LicenseSet
    32  
    33  	if lr.catalogerConfig.SearchRemoteLicenses {
    34  		licenses, err := lr.getLicensesFromRemote(ctx, packageName, packageVersion)
    35  		if err == nil && licenses != nil {
    36  			licenseSet = pkg.NewLicenseSet(licenses...)
    37  		}
    38  		if err != nil {
    39  			log.Debugf("unable to extract licenses from pypi registry for package %s:%s: %+v", packageName, packageVersion, err)
    40  		}
    41  	}
    42  	return licenseSet
    43  }
    44  
    45  func (lr *pythonLicenseResolver) getLicensesFromRemote(ctx context.Context, packageName string, packageVersion string) ([]pkg.License, error) {
    46  	return lr.licenseCache.Resolve(fmt.Sprintf("%s/%s", packageName, packageVersion), func() ([]pkg.License, error) {
    47  		license, err := getLicenseFromPypiRegistry(lr.catalogerConfig.PypiBaseURL, packageName, packageVersion)
    48  		if err == nil && license != "" {
    49  			licenses := pkg.NewLicensesFromValuesWithContext(ctx, license)
    50  			return licenses, nil
    51  		}
    52  		if err != nil {
    53  			log.Debugf("unable to extract licenses from pypi registry for package %s:%s: %+v", packageName, packageVersion, err)
    54  		}
    55  		return nil, err
    56  	})
    57  }
    58  
    59  func formatPypiRegistryURL(baseURL, packageName, version string) (requestURL string, err error) {
    60  	if packageName == "" {
    61  		return "", fmt.Errorf("unable to format pypi request for a blank package name")
    62  	}
    63  
    64  	urlPath := []string{packageName, version, "json"}
    65  	requestURL, err = url.JoinPath(baseURL, urlPath...)
    66  	if err != nil {
    67  		return requestURL, fmt.Errorf("unable to format pypi request for pkg:version %s%s; %w", packageName, version, err)
    68  	}
    69  	return requestURL, nil
    70  }
    71  
    72  func getLicenseFromPypiRegistry(baseURL, packageName, version string) (string, error) {
    73  	// "https://pypi.org/pypi/%s/%s/json", packageName, version
    74  	requestURL, err := formatPypiRegistryURL(baseURL, packageName, version)
    75  	if err != nil {
    76  		return "", fmt.Errorf("unable to format pypi request for pkg:version %s%s; %w", packageName, version, err)
    77  	}
    78  	log.WithFields("url", requestURL).Info("downloading python package from pypi")
    79  
    80  	pypiRequest, err := http.NewRequest(http.MethodGet, requestURL, nil)
    81  	if err != nil {
    82  		return "", fmt.Errorf("unable to format remote request: %w", err)
    83  	}
    84  
    85  	httpClient := &http.Client{
    86  		Timeout: time.Second * 10,
    87  	}
    88  
    89  	resp, err := httpClient.Do(pypiRequest)
    90  	if err != nil {
    91  		return "", fmt.Errorf("unable to get package from pypi registry: %w", err)
    92  	}
    93  	defer func() {
    94  		if err := resp.Body.Close(); err != nil {
    95  			log.Errorf("unable to close body: %+v", err)
    96  		}
    97  	}()
    98  
    99  	if resp.StatusCode != 200 {
   100  		return "", fmt.Errorf("unable to get package from pypi registry")
   101  	}
   102  
   103  	bytes, err := io.ReadAll(resp.Body)
   104  	if err != nil {
   105  		return "", fmt.Errorf("unable to parse package from pypi registry: %w", err)
   106  	}
   107  
   108  	dec := json.NewDecoder(strings.NewReader(string(bytes)))
   109  
   110  	// Read "license" from the response
   111  	var pypiResponse struct {
   112  		Info struct {
   113  			License           string `json:"license"`
   114  			LicenseExpression string `json:"license_expression"`
   115  		} `json:"info"`
   116  	}
   117  
   118  	if err := dec.Decode(&pypiResponse); err != nil {
   119  		return "", fmt.Errorf("unable to parse license from pypi registry: %w", err)
   120  	}
   121  
   122  	var license string
   123  	if pypiResponse.Info.LicenseExpression != "" {
   124  		license = pypiResponse.Info.LicenseExpression
   125  	} else {
   126  		license = pypiResponse.Info.License
   127  	}
   128  	log.Tracef("Retrieved License: %s", license)
   129  
   130  	return license, nil
   131  }