github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/python/license.go (about) 1 package python 2 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 "io" 8 "net/http" 9 "net/url" 10 "strings" 11 "time" 12 13 "github.com/anchore/syft/internal/cache" 14 "github.com/anchore/syft/internal/log" 15 "github.com/anchore/syft/syft/pkg" 16 ) 17 18 type pythonLicenseResolver struct { 19 catalogerConfig CatalogerConfig 20 licenseCache cache.Resolver[[]pkg.License] 21 } 22 23 func newPythonLicenseResolver(config CatalogerConfig) pythonLicenseResolver { 24 return pythonLicenseResolver{ 25 licenseCache: cache.GetResolverCachingErrors[[]pkg.License]("python", "v1"), 26 catalogerConfig: config, 27 } 28 } 29 30 func (lr *pythonLicenseResolver) getLicenses(ctx context.Context, packageName string, packageVersion string) pkg.LicenseSet { 31 var licenseSet pkg.LicenseSet 32 33 if lr.catalogerConfig.SearchRemoteLicenses { 34 licenses, err := lr.getLicensesFromRemote(ctx, packageName, packageVersion) 35 if err == nil && licenses != nil { 36 licenseSet = pkg.NewLicenseSet(licenses...) 37 } 38 if err != nil { 39 log.Debugf("unable to extract licenses from pypi registry for package %s:%s: %+v", packageName, packageVersion, err) 40 } 41 } 42 return licenseSet 43 } 44 45 func (lr *pythonLicenseResolver) getLicensesFromRemote(ctx context.Context, packageName string, packageVersion string) ([]pkg.License, error) { 46 return lr.licenseCache.Resolve(fmt.Sprintf("%s/%s", packageName, packageVersion), func() ([]pkg.License, error) { 47 license, err := getLicenseFromPypiRegistry(lr.catalogerConfig.PypiBaseURL, packageName, packageVersion) 48 if err == nil && license != "" { 49 licenses := pkg.NewLicensesFromValuesWithContext(ctx, license) 50 return licenses, nil 51 } 52 if err != nil { 53 log.Debugf("unable to extract licenses from pypi registry for package %s:%s: %+v", packageName, packageVersion, err) 54 } 55 return nil, err 56 }) 57 } 58 59 func formatPypiRegistryURL(baseURL, packageName, version string) (requestURL string, err error) { 60 if packageName == "" { 61 return "", fmt.Errorf("unable to format pypi request for a blank package name") 62 } 63 64 urlPath := []string{packageName, version, "json"} 65 requestURL, err = url.JoinPath(baseURL, urlPath...) 66 if err != nil { 67 return requestURL, fmt.Errorf("unable to format pypi request for pkg:version %s%s; %w", packageName, version, err) 68 } 69 return requestURL, nil 70 } 71 72 func getLicenseFromPypiRegistry(baseURL, packageName, version string) (string, error) { 73 // "https://pypi.org/pypi/%s/%s/json", packageName, version 74 requestURL, err := formatPypiRegistryURL(baseURL, packageName, version) 75 if err != nil { 76 return "", fmt.Errorf("unable to format pypi request for pkg:version %s%s; %w", packageName, version, err) 77 } 78 log.WithFields("url", requestURL).Info("downloading python package from pypi") 79 80 pypiRequest, err := http.NewRequest(http.MethodGet, requestURL, nil) 81 if err != nil { 82 return "", fmt.Errorf("unable to format remote request: %w", err) 83 } 84 85 httpClient := &http.Client{ 86 Timeout: time.Second * 10, 87 } 88 89 resp, err := httpClient.Do(pypiRequest) 90 if err != nil { 91 return "", fmt.Errorf("unable to get package from pypi registry: %w", err) 92 } 93 defer func() { 94 if err := resp.Body.Close(); err != nil { 95 log.Errorf("unable to close body: %+v", err) 96 } 97 }() 98 99 if resp.StatusCode != 200 { 100 return "", fmt.Errorf("unable to get package from pypi registry") 101 } 102 103 bytes, err := io.ReadAll(resp.Body) 104 if err != nil { 105 return "", fmt.Errorf("unable to parse package from pypi registry: %w", err) 106 } 107 108 dec := json.NewDecoder(strings.NewReader(string(bytes))) 109 110 // Read "license" from the response 111 var pypiResponse struct { 112 Info struct { 113 License string `json:"license"` 114 LicenseExpression string `json:"license_expression"` 115 } `json:"info"` 116 } 117 118 if err := dec.Decode(&pypiResponse); err != nil { 119 return "", fmt.Errorf("unable to parse license from pypi registry: %w", err) 120 } 121 122 var license string 123 if pypiResponse.Info.LicenseExpression != "" { 124 license = pypiResponse.Info.LicenseExpression 125 } else { 126 license = pypiResponse.Info.License 127 } 128 log.Tracef("Retrieved License: %s", license) 129 130 return license, nil 131 }