github.com/google/osv-scalibr@v0.4.1/clients/resolution/pypi_registry_client.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package resolution
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"path/filepath"
    22  	"slices"
    23  	"strings"
    24  
    25  	"deps.dev/util/pypi"
    26  	"deps.dev/util/resolve"
    27  	"deps.dev/util/resolve/dep"
    28  	"deps.dev/util/resolve/version"
    29  	"deps.dev/util/semver"
    30  	"github.com/google/osv-scalibr/clients/datasource"
    31  	internalpypi "github.com/google/osv-scalibr/clients/internal/pypi"
    32  	"github.com/google/osv-scalibr/log"
    33  )
    34  
    35  // PyPIRegistryClient is a client to fetch data from PyPI registry.
    36  type PyPIRegistryClient struct {
    37  	api *datasource.PyPIRegistryAPIClient
    38  }
    39  
    40  // NewPyPIRegistryClient makes a new PyPIRegistryClient.
    41  func NewPyPIRegistryClient(registry string, localRegistry string) *PyPIRegistryClient {
    42  	return &PyPIRegistryClient{api: datasource.NewPyPIRegistryAPIClient(registry, localRegistry)}
    43  }
    44  
    45  // SetLocalRegistry sets the local directory that stores the downloaded PyPI manifests.
    46  func (c *PyPIRegistryClient) SetLocalRegistry(localRegistry string) {
    47  	c.api.SetLocalRegistry(localRegistry)
    48  }
    49  
    50  // Version returns metadata of a version specified by the VersionKey.
    51  func (c *PyPIRegistryClient) Version(ctx context.Context, vk resolve.VersionKey) (resolve.Version, error) {
    52  	// Version is not used by the PyPI resolver for now, so here
    53  	// only returns the VersionKey with yanked or not.
    54  	// We may need to add more metadata in the future.
    55  	resp, err := c.api.GetIndex(ctx, vk.Name)
    56  	if err != nil {
    57  		return resolve.Version{}, err
    58  	}
    59  
    60  	files := lookupFile(vk, resp.Name, resp.Files)
    61  	if len(files) == 0 {
    62  		return resolve.Version{}, fmt.Errorf("no file found for package %s version %s", vk.Name, vk.Version)
    63  	}
    64  
    65  	ver := resolve.Version{VersionKey: vk}
    66  	if files[0].Yanked.Value {
    67  		// Assume this version is yanked if the first file is yanked.
    68  		var yanked version.AttrSet
    69  		yanked.SetAttr(version.Blocked, "")
    70  		ver.AttrSet = yanked
    71  	}
    72  	return ver, nil
    73  }
    74  
    75  // Versions returns all the available versions of the package specified by the given PackageKey.
    76  func (c *PyPIRegistryClient) Versions(ctx context.Context, pk resolve.PackageKey) ([]resolve.Version, error) {
    77  	resp, err := c.api.GetIndex(ctx, pk.Name)
    78  	if err != nil {
    79  		return nil, err
    80  	}
    81  
    82  	slices.SortFunc(resp.Versions, func(a, b string) int { return semver.PyPI.Compare(a, b) })
    83  
    84  	var yanked version.AttrSet
    85  	yanked.SetAttr(version.Blocked, "")
    86  
    87  	yankedVersions := make(map[string]bool)
    88  	for _, file := range resp.Files {
    89  		if !file.Yanked.Value {
    90  			continue
    91  		}
    92  		var v string
    93  		switch filepath.Ext(file.Name) {
    94  		case ".gz":
    95  			_, v, err = pypi.SdistVersion(resp.Name, file.Name)
    96  			if err != nil {
    97  				log.Warnf("failed to extract version from sdist file name %s: %v", file.Name, err)
    98  				continue
    99  			}
   100  		case ".whl":
   101  			info, err := pypi.ParseWheelName(file.Name)
   102  			if err != nil {
   103  				log.Warnf("failed to parse wheel name %s: %v", file.Name, err)
   104  				continue
   105  			}
   106  			v = info.Version
   107  		case ".egg":
   108  			v, err = versionFromEggFilename(file.Name)
   109  			if err != nil {
   110  				log.Warnf("failed to extract version from file %s: %v", file.Name, err)
   111  				continue
   112  			}
   113  		case ".zip":
   114  			v, err = versionFromZipFilename(file.Name)
   115  			if err != nil {
   116  				log.Warnf("failed to extract version from file %s: %v", file.Name, err)
   117  				continue
   118  			}
   119  		default:
   120  			continue
   121  		}
   122  		// If a file is yanked, assume this version is yanked.
   123  		yankedVersions[v] = true
   124  	}
   125  
   126  	var versions []resolve.Version
   127  	for _, ver := range resp.Versions {
   128  		v := resolve.Version{
   129  			VersionKey: resolve.VersionKey{
   130  				PackageKey:  pk,
   131  				Version:     ver,
   132  				VersionType: resolve.Concrete,
   133  			},
   134  		}
   135  		if yankedVersions[ver] {
   136  			v.AttrSet = yanked
   137  		}
   138  		versions = append(versions, v)
   139  	}
   140  
   141  	return versions, nil
   142  }
   143  
   144  // versionFromZipFilename extracts the version from a PyPI .zip filename.
   145  func versionFromZipFilename(filename string) (version string, err error) {
   146  	baseName := strings.TrimSuffix(filename, ".zip")
   147  	lastHyphenIndex := strings.LastIndex(baseName, "-")
   148  	if lastHyphenIndex == -1 {
   149  		// No hyphen found, likely just a package name without a version or invalid format
   150  		return "", fmt.Errorf("could not find version in filename: %s", filename)
   151  	}
   152  	return baseName[lastHyphenIndex+1:], nil
   153  }
   154  
   155  // versionFromEggFilename extracts the version from an .egg filename.
   156  func versionFromEggFilename(filename string) (version string, err error) {
   157  	baseName := strings.TrimSuffix(filename, ".egg")
   158  	pyTagIndex := strings.LastIndex(baseName, "-py")
   159  
   160  	if pyTagIndex == -1 {
   161  		// If no '-py' tag is found, treat it like a simple 'package-name-version.egg' format.
   162  		lastHyphenIndex := strings.LastIndex(baseName, "-")
   163  		if lastHyphenIndex == -1 {
   164  			return "", fmt.Errorf("could not find version in filename: %s", filename)
   165  		}
   166  		version = baseName[lastHyphenIndex+1:]
   167  	} else {
   168  		// Standard egg file name format: 'package_name-version-pyX.Y'
   169  		// The part before '-py' contains "package_name-version".
   170  		nameAndVersion := baseName[:pyTagIndex]
   171  
   172  		lastHyphenInNameAndVersion := strings.LastIndex(nameAndVersion, "-")
   173  		if lastHyphenInNameAndVersion == -1 {
   174  			// No hyphen found that indicates an unexpected format
   175  			return "", fmt.Errorf("could not find version in filename: %s", filename)
   176  		}
   177  		version = nameAndVersion[lastHyphenInNameAndVersion+1:]
   178  	}
   179  	return version, nil
   180  }
   181  
   182  // Requirements returns requirements of a version specified by the VersionKey.
   183  func (c *PyPIRegistryClient) Requirements(ctx context.Context, vk resolve.VersionKey) ([]resolve.RequirementVersion, error) {
   184  	resp, err := c.api.GetIndex(ctx, vk.Name)
   185  	if err != nil {
   186  		return nil, err
   187  	}
   188  
   189  	// We choose the first file that matches the specified version.
   190  	// TODO(#845): select the release file based on some criteria (e.g. platform)
   191  	files := lookupFile(vk, resp.Name, resp.Files)
   192  	// For each file, parse the metadata. If there is an error, try the next file until some requirements are found.
   193  	for _, file := range files {
   194  		data, err := c.api.GetFile(ctx, file.URL)
   195  		if err != nil {
   196  			log.Warnf("failed to get file %s: %v", file.Name, err)
   197  			continue
   198  		}
   199  
   200  		var metadata *pypi.Metadata
   201  		switch ext := filepath.Ext(file.Name); ext {
   202  		case ".gz":
   203  			metadata, err = pypi.SdistMetadata(ctx, file.Name, bytes.NewReader(data))
   204  		case ".whl":
   205  			metadata, err = pypi.WheelMetadata(ctx, bytes.NewReader(data), int64(len(data)))
   206  		default:
   207  			log.Infof("unsupported file extension for requirements: %s", ext)
   208  			continue
   209  		}
   210  		if err != nil {
   211  			log.Warnf("failed to parse metadata for file %s: %v", file.Name, err)
   212  			continue
   213  		}
   214  
   215  		var reqs []resolve.RequirementVersion
   216  		for _, d := range metadata.Dependencies {
   217  			t := dep.NewType()
   218  			if d.Extras != "" {
   219  				t.AddAttr(dep.EnabledDependencies, d.Extras)
   220  			}
   221  			if d.Environment != "" {
   222  				t.AddAttr(dep.Environment, d.Environment)
   223  			}
   224  
   225  			reqs = append(reqs, resolve.RequirementVersion{
   226  				VersionKey: resolve.VersionKey{
   227  					PackageKey: resolve.PackageKey{
   228  						System: resolve.PyPI,
   229  						Name:   d.Name,
   230  					},
   231  					Version:     d.Constraint,
   232  					VersionType: resolve.Requirement,
   233  				},
   234  				Type: t,
   235  			})
   236  		}
   237  
   238  		return reqs, nil
   239  	}
   240  
   241  	return nil, fmt.Errorf("no file can be used for parsing requirements for package %s version %s", vk.Name, vk.Version)
   242  }
   243  
   244  // lookupFile searches for all file that matches the given version from the list of available distribution files.
   245  func lookupFile(vk resolve.VersionKey, name string, files []internalpypi.File) []internalpypi.File {
   246  	var matches []internalpypi.File
   247  	for _, file := range files {
   248  		ext := filepath.Ext(file.Name)
   249  		switch ext {
   250  		case ".gz":
   251  			_, v, err := pypi.SdistVersion(name, file.Name)
   252  			if err != nil {
   253  				log.Warnf("failed to extract version from sdist file name %s: %v", file.Name, err)
   254  				continue
   255  			}
   256  			if v != vk.Version {
   257  				continue
   258  			}
   259  		case ".whl":
   260  			info, err := pypi.ParseWheelName(file.Name)
   261  			if err != nil {
   262  				log.Warnf("failed to parse wheel name %s: %v", file.Name, err)
   263  				continue
   264  			}
   265  			if info.Version != vk.Version {
   266  				continue
   267  			}
   268  		case ".egg":
   269  			v, err := versionFromEggFilename(file.Name)
   270  			if err != nil {
   271  				log.Warnf("failed to extract version from file %s: %v", file.Name, err)
   272  				continue
   273  			}
   274  			if v != vk.Version {
   275  				continue
   276  			}
   277  		case ".zip":
   278  			v, err := versionFromZipFilename(file.Name)
   279  			if err != nil {
   280  				log.Warnf("failed to extract version from file %s: %v", file.Name, err)
   281  				continue
   282  			}
   283  			if v != vk.Version {
   284  				continue
   285  			}
   286  		default:
   287  			continue
   288  		}
   289  		matches = append(matches, file)
   290  	}
   291  	return matches
   292  }
   293  
   294  // MatchingVersions returns versions matching the requirement specified by the VersionKey.
   295  func (c *PyPIRegistryClient) MatchingVersions(ctx context.Context, vk resolve.VersionKey) ([]resolve.Version, error) {
   296  	versions, err := c.Versions(ctx, vk.PackageKey)
   297  	if err != nil {
   298  		return nil, err
   299  	}
   300  
   301  	return resolve.MatchRequirement(vk, versions), nil
   302  }