github.com/google/osv-scalibr@v0.4.1/clients/resolution/pypi_registry_client.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package resolution 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "path/filepath" 22 "slices" 23 "strings" 24 25 "deps.dev/util/pypi" 26 "deps.dev/util/resolve" 27 "deps.dev/util/resolve/dep" 28 "deps.dev/util/resolve/version" 29 "deps.dev/util/semver" 30 "github.com/google/osv-scalibr/clients/datasource" 31 internalpypi "github.com/google/osv-scalibr/clients/internal/pypi" 32 "github.com/google/osv-scalibr/log" 33 ) 34 35 // PyPIRegistryClient is a client to fetch data from PyPI registry. 36 type PyPIRegistryClient struct { 37 api *datasource.PyPIRegistryAPIClient 38 } 39 40 // NewPyPIRegistryClient makes a new PyPIRegistryClient. 41 func NewPyPIRegistryClient(registry string, localRegistry string) *PyPIRegistryClient { 42 return &PyPIRegistryClient{api: datasource.NewPyPIRegistryAPIClient(registry, localRegistry)} 43 } 44 45 // SetLocalRegistry sets the local directory that stores the downloaded PyPI manifests. 46 func (c *PyPIRegistryClient) SetLocalRegistry(localRegistry string) { 47 c.api.SetLocalRegistry(localRegistry) 48 } 49 50 // Version returns metadata of a version specified by the VersionKey. 51 func (c *PyPIRegistryClient) Version(ctx context.Context, vk resolve.VersionKey) (resolve.Version, error) { 52 // Version is not used by the PyPI resolver for now, so here 53 // only returns the VersionKey with yanked or not. 54 // We may need to add more metadata in the future. 55 resp, err := c.api.GetIndex(ctx, vk.Name) 56 if err != nil { 57 return resolve.Version{}, err 58 } 59 60 files := lookupFile(vk, resp.Name, resp.Files) 61 if len(files) == 0 { 62 return resolve.Version{}, fmt.Errorf("no file found for package %s version %s", vk.Name, vk.Version) 63 } 64 65 ver := resolve.Version{VersionKey: vk} 66 if files[0].Yanked.Value { 67 // Assume this version is yanked if the first file is yanked. 68 var yanked version.AttrSet 69 yanked.SetAttr(version.Blocked, "") 70 ver.AttrSet = yanked 71 } 72 return ver, nil 73 } 74 75 // Versions returns all the available versions of the package specified by the given PackageKey. 76 func (c *PyPIRegistryClient) Versions(ctx context.Context, pk resolve.PackageKey) ([]resolve.Version, error) { 77 resp, err := c.api.GetIndex(ctx, pk.Name) 78 if err != nil { 79 return nil, err 80 } 81 82 slices.SortFunc(resp.Versions, func(a, b string) int { return semver.PyPI.Compare(a, b) }) 83 84 var yanked version.AttrSet 85 yanked.SetAttr(version.Blocked, "") 86 87 yankedVersions := make(map[string]bool) 88 for _, file := range resp.Files { 89 if !file.Yanked.Value { 90 continue 91 } 92 var v string 93 switch filepath.Ext(file.Name) { 94 case ".gz": 95 _, v, err = pypi.SdistVersion(resp.Name, file.Name) 96 if err != nil { 97 log.Warnf("failed to extract version from sdist file name %s: %v", file.Name, err) 98 continue 99 } 100 case ".whl": 101 info, err := pypi.ParseWheelName(file.Name) 102 if err != nil { 103 log.Warnf("failed to parse wheel name %s: %v", file.Name, err) 104 continue 105 } 106 v = info.Version 107 case ".egg": 108 v, err = versionFromEggFilename(file.Name) 109 if err != nil { 110 log.Warnf("failed to extract version from file %s: %v", file.Name, err) 111 continue 112 } 113 case ".zip": 114 v, err = versionFromZipFilename(file.Name) 115 if err != nil { 116 log.Warnf("failed to extract version from file %s: %v", file.Name, err) 117 continue 118 } 119 default: 120 continue 121 } 122 // If a file is yanked, assume this version is yanked. 123 yankedVersions[v] = true 124 } 125 126 var versions []resolve.Version 127 for _, ver := range resp.Versions { 128 v := resolve.Version{ 129 VersionKey: resolve.VersionKey{ 130 PackageKey: pk, 131 Version: ver, 132 VersionType: resolve.Concrete, 133 }, 134 } 135 if yankedVersions[ver] { 136 v.AttrSet = yanked 137 } 138 versions = append(versions, v) 139 } 140 141 return versions, nil 142 } 143 144 // versionFromZipFilename extracts the version from a PyPI .zip filename. 145 func versionFromZipFilename(filename string) (version string, err error) { 146 baseName := strings.TrimSuffix(filename, ".zip") 147 lastHyphenIndex := strings.LastIndex(baseName, "-") 148 if lastHyphenIndex == -1 { 149 // No hyphen found, likely just a package name without a version or invalid format 150 return "", fmt.Errorf("could not find version in filename: %s", filename) 151 } 152 return baseName[lastHyphenIndex+1:], nil 153 } 154 155 // versionFromEggFilename extracts the version from an .egg filename. 156 func versionFromEggFilename(filename string) (version string, err error) { 157 baseName := strings.TrimSuffix(filename, ".egg") 158 pyTagIndex := strings.LastIndex(baseName, "-py") 159 160 if pyTagIndex == -1 { 161 // If no '-py' tag is found, treat it like a simple 'package-name-version.egg' format. 162 lastHyphenIndex := strings.LastIndex(baseName, "-") 163 if lastHyphenIndex == -1 { 164 return "", fmt.Errorf("could not find version in filename: %s", filename) 165 } 166 version = baseName[lastHyphenIndex+1:] 167 } else { 168 // Standard egg file name format: 'package_name-version-pyX.Y' 169 // The part before '-py' contains "package_name-version". 170 nameAndVersion := baseName[:pyTagIndex] 171 172 lastHyphenInNameAndVersion := strings.LastIndex(nameAndVersion, "-") 173 if lastHyphenInNameAndVersion == -1 { 174 // No hyphen found that indicates an unexpected format 175 return "", fmt.Errorf("could not find version in filename: %s", filename) 176 } 177 version = nameAndVersion[lastHyphenInNameAndVersion+1:] 178 } 179 return version, nil 180 } 181 182 // Requirements returns requirements of a version specified by the VersionKey. 183 func (c *PyPIRegistryClient) Requirements(ctx context.Context, vk resolve.VersionKey) ([]resolve.RequirementVersion, error) { 184 resp, err := c.api.GetIndex(ctx, vk.Name) 185 if err != nil { 186 return nil, err 187 } 188 189 // We choose the first file that matches the specified version. 190 // TODO(#845): select the release file based on some criteria (e.g. platform) 191 files := lookupFile(vk, resp.Name, resp.Files) 192 // For each file, parse the metadata. If there is an error, try the next file until some requirements are found. 193 for _, file := range files { 194 data, err := c.api.GetFile(ctx, file.URL) 195 if err != nil { 196 log.Warnf("failed to get file %s: %v", file.Name, err) 197 continue 198 } 199 200 var metadata *pypi.Metadata 201 switch ext := filepath.Ext(file.Name); ext { 202 case ".gz": 203 metadata, err = pypi.SdistMetadata(ctx, file.Name, bytes.NewReader(data)) 204 case ".whl": 205 metadata, err = pypi.WheelMetadata(ctx, bytes.NewReader(data), int64(len(data))) 206 default: 207 log.Infof("unsupported file extension for requirements: %s", ext) 208 continue 209 } 210 if err != nil { 211 log.Warnf("failed to parse metadata for file %s: %v", file.Name, err) 212 continue 213 } 214 215 var reqs []resolve.RequirementVersion 216 for _, d := range metadata.Dependencies { 217 t := dep.NewType() 218 if d.Extras != "" { 219 t.AddAttr(dep.EnabledDependencies, d.Extras) 220 } 221 if d.Environment != "" { 222 t.AddAttr(dep.Environment, d.Environment) 223 } 224 225 reqs = append(reqs, resolve.RequirementVersion{ 226 VersionKey: resolve.VersionKey{ 227 PackageKey: resolve.PackageKey{ 228 System: resolve.PyPI, 229 Name: d.Name, 230 }, 231 Version: d.Constraint, 232 VersionType: resolve.Requirement, 233 }, 234 Type: t, 235 }) 236 } 237 238 return reqs, nil 239 } 240 241 return nil, fmt.Errorf("no file can be used for parsing requirements for package %s version %s", vk.Name, vk.Version) 242 } 243 244 // lookupFile searches for all file that matches the given version from the list of available distribution files. 245 func lookupFile(vk resolve.VersionKey, name string, files []internalpypi.File) []internalpypi.File { 246 var matches []internalpypi.File 247 for _, file := range files { 248 ext := filepath.Ext(file.Name) 249 switch ext { 250 case ".gz": 251 _, v, err := pypi.SdistVersion(name, file.Name) 252 if err != nil { 253 log.Warnf("failed to extract version from sdist file name %s: %v", file.Name, err) 254 continue 255 } 256 if v != vk.Version { 257 continue 258 } 259 case ".whl": 260 info, err := pypi.ParseWheelName(file.Name) 261 if err != nil { 262 log.Warnf("failed to parse wheel name %s: %v", file.Name, err) 263 continue 264 } 265 if info.Version != vk.Version { 266 continue 267 } 268 case ".egg": 269 v, err := versionFromEggFilename(file.Name) 270 if err != nil { 271 log.Warnf("failed to extract version from file %s: %v", file.Name, err) 272 continue 273 } 274 if v != vk.Version { 275 continue 276 } 277 case ".zip": 278 v, err := versionFromZipFilename(file.Name) 279 if err != nil { 280 log.Warnf("failed to extract version from file %s: %v", file.Name, err) 281 continue 282 } 283 if v != vk.Version { 284 continue 285 } 286 default: 287 continue 288 } 289 matches = append(matches, file) 290 } 291 return matches 292 } 293 294 // MatchingVersions returns versions matching the requirement specified by the VersionKey. 295 func (c *PyPIRegistryClient) MatchingVersions(ctx context.Context, vk resolve.VersionKey) ([]resolve.Version, error) { 296 versions, err := c.Versions(ctx, vk.PackageKey) 297 if err != nil { 298 return nil, err 299 } 300 301 return resolve.MatchRequirement(vk, versions), nil 302 }