github.com/google/osv-scalibr@v0.4.1/semantic/version-pypi.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package semantic 16 17 import ( 18 "fmt" 19 "math/big" 20 "regexp" 21 "strings" 22 ) 23 24 var ( 25 pypiLocalVersionSplitter = regexp.MustCompile(`[._-]`) 26 pypiVersionPartsFinder = regexp.MustCompile(`(\d+|[a-z]+|\.|-)`) 27 // from https://peps.python.org/pep-0440/#appendix-b-parsing-version-strings-with-regular-expressions 28 pypiVersionFinder = regexp.MustCompile(`^\s*v?(?:(?:(?P<epoch>[0-9]+)!)?(?P<release>[0-9]+(?:\.[0-9]+)*)(?P<pre>[-_\.]?(?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))[-_\.]?(?P<pre_n>[0-9]+)?)?(?P<post>(?:-(?P<post_n1>[0-9]+))|(?:[-_\.]?(?P<post_l>post|rev|r)[-_\.]?(?P<post_n2>[0-9]+)?))?(?P<dev>[-_\.]?(?P<dev_l>dev)[-_\.]?(?P<dev_n>[0-9]+)?)?)(?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?\s*$`) 29 ) 30 31 type pyPIVersion struct { 32 epoch *big.Int 33 release components 34 pre letterAndNumber 35 post letterAndNumber 36 dev letterAndNumber 37 local []string 38 legacy []string 39 } 40 41 type letterAndNumber struct { 42 letter string 43 number *big.Int 44 } 45 46 func parseLetterVersion(letter, number string) (letterAndNumber, error) { 47 if letter != "" { 48 // we consider there to be an implicit 0 in a pre-release 49 // if there is not a numeral associated with it 50 if number == "" { 51 number = "0" 52 } 53 54 // we normalize any letters to their lowercase form 55 letter = strings.ToLower(letter) 56 57 // we consider some words to be alternative spellings of other words and in 58 // those cases we want to normalize the spellings to our preferred spelling 59 switch letter { 60 case "alpha": 61 letter = "a" 62 case "beta": 63 letter = "b" 64 case "c": 65 fallthrough 66 case "pre": 67 fallthrough 68 case "preview": 69 letter = "rc" 70 case "rev": 71 fallthrough 72 case "r": 73 letter = "post" 74 } 75 76 num, err := convertToBigInt(number) 77 78 if err != nil { 79 return letterAndNumber{}, err 80 } 81 82 return letterAndNumber{letter, num}, nil 83 } 84 85 if number != "" { 86 // we assume if we're given a number but not a letter then this is using 87 // the implicit post release syntax (e.g. 1.0-1) 88 letter = "post" 89 90 num, err := convertToBigInt(number) 91 92 if err != nil { 93 return letterAndNumber{}, err 94 } 95 96 return letterAndNumber{letter, num}, nil 97 } 98 99 return letterAndNumber{}, nil 100 } 101 102 func parseLocalVersion(local string) (parts []string) { 103 for _, part := range pypiLocalVersionSplitter.Split(local, -1) { 104 parts = append(parts, strings.ToLower(part)) 105 } 106 107 return parts 108 } 109 110 func normalizePyPILegacyPart(part string) string { 111 switch part { 112 case "pre": 113 part = "c" 114 case "preview": 115 part = "c" 116 case "-": 117 part = "final-" 118 case "rc": 119 part = "c" 120 case "dev": 121 part = "@" 122 } 123 124 if isASCIIDigit(rune(part[0])) { 125 // pad for numeric comparison 126 return fmt.Sprintf("%08s", part) 127 } 128 129 return "*" + part 130 } 131 132 func parsePyPIVersionParts(str string) (parts []string) { 133 splits := pypiVersionPartsFinder.FindAllString(str, -1) 134 splits = append(splits, "final") 135 136 for _, part := range splits { 137 if part == "" || part == "." { 138 continue 139 } 140 141 part = normalizePyPILegacyPart(part) 142 143 if strings.HasPrefix(part, "*") { 144 if strings.Compare(part, "*final") < 0 { 145 for len(parts) > 0 && parts[len(parts)-1] == "*final-" { 146 parts = parts[:len(parts)-1] 147 } 148 } 149 150 for len(parts) > 0 && parts[len(parts)-1] == "00000000" { 151 parts = parts[:len(parts)-1] 152 } 153 } 154 155 parts = append(parts, part) 156 } 157 158 return parts 159 } 160 161 func parsePyPILegacyVersion(str string) pyPIVersion { 162 parts := parsePyPIVersionParts(str) 163 164 return pyPIVersion{epoch: big.NewInt(-1), legacy: parts} 165 } 166 167 func parsePyPIVersion(str string) (pyPIVersion, error) { 168 str = strings.ToLower(str) 169 170 match := pypiVersionFinder.FindStringSubmatch(str) 171 172 if len(match) == 0 { 173 return parsePyPILegacyVersion(str), nil 174 } 175 176 var version pyPIVersion 177 178 version.epoch = big.NewInt(0) 179 180 if epStr := match[pypiVersionFinder.SubexpIndex("epoch")]; epStr != "" { 181 epoch, err := convertToBigInt(epStr) 182 183 if err != nil { 184 return pyPIVersion{}, err 185 } 186 187 version.epoch = epoch 188 } 189 190 for r := range strings.SplitSeq(match[pypiVersionFinder.SubexpIndex("release")], ".") { 191 release, err := convertToBigInt(r) 192 193 if err != nil { 194 return pyPIVersion{}, err 195 } 196 197 version.release = append(version.release, release) 198 } 199 200 pre, err := parseLetterVersion(match[pypiVersionFinder.SubexpIndex("pre_l")], match[pypiVersionFinder.SubexpIndex("pre_n")]) 201 202 if err != nil { 203 return pyPIVersion{}, err 204 } 205 206 version.pre = pre 207 208 post := match[pypiVersionFinder.SubexpIndex("post_n1")] 209 210 if post == "" { 211 post = match[pypiVersionFinder.SubexpIndex("post_n2")] 212 } 213 214 post2, err := parseLetterVersion(match[pypiVersionFinder.SubexpIndex("post_l")], post) 215 216 if err != nil { 217 return pyPIVersion{}, err 218 } 219 220 version.post = post2 221 222 dev, err := parseLetterVersion(match[pypiVersionFinder.SubexpIndex("dev_l")], match[pypiVersionFinder.SubexpIndex("dev_n")]) 223 224 if err != nil { 225 return pyPIVersion{}, err 226 } 227 228 version.dev = dev 229 version.local = parseLocalVersion(match[pypiVersionFinder.SubexpIndex("local")]) 230 231 return version, nil 232 } 233 234 // Compares the epoch segments of each version 235 func (pv pyPIVersion) compareEpoch(pw pyPIVersion) int { 236 return pv.epoch.Cmp(pw.epoch) 237 } 238 239 // Compares the release segments of each version, which considers the numeric value 240 // of each component in turn; when comparing release segments with different numbers 241 // of components, the shorter segment is padded out with additional zeros as necessary. 242 func (pv pyPIVersion) compareRelease(pw pyPIVersion) int { 243 return pv.release.Cmp(pw.release) 244 } 245 246 // Checks if this pyPIVersion should apply a sort trick when comparing pre, 247 // which ensures that i.e. 1.0.dev0 is before 1.0a0. 248 func (pv pyPIVersion) shouldApplyPreTrick() bool { 249 return pv.pre.number == nil && pv.post.number == nil && pv.dev.number != nil 250 } 251 252 // Compares the pre-release segment of each version, which consist of an alphabetical 253 // identifier for the pre-release phase, along with a non-negative integer value. 254 // 255 // Pre-releases for a given release are ordered first by phase (alpha, beta, release 256 // candidate) and then by the numerical component within that phase. 257 // 258 // Versions without a pre-release are sorted after those with one. 259 func (pv pyPIVersion) comparePre(pw pyPIVersion) int { 260 switch { 261 case pv.shouldApplyPreTrick() && pw.shouldApplyPreTrick(): 262 return +0 263 case pv.shouldApplyPreTrick(): 264 return -1 265 case pw.shouldApplyPreTrick(): 266 return +1 267 case pv.pre.number == nil && pw.pre.number == nil: 268 return +0 269 case pv.pre.number == nil: 270 return +1 271 case pw.pre.number == nil: 272 return -1 273 default: 274 ai := pv.pre.letter[0] 275 bi := pw.pre.letter[0] 276 277 if ai > bi { 278 return +1 279 } 280 if ai < bi { 281 return -1 282 } 283 284 return pv.pre.number.Cmp(pw.pre.number) 285 } 286 } 287 288 // Compares the post-release segment of each version. 289 // 290 // Post-releases are ordered by their numerical component, immediately following 291 // the corresponding release, and ahead of any subsequent release. 292 // 293 // Versions without a post segment are sorted before those with one. 294 func (pv pyPIVersion) comparePost(pw pyPIVersion) int { 295 switch { 296 case pv.post.number == nil && pw.post.number == nil: 297 return +0 298 case pv.post.number == nil: 299 return -1 300 case pw.post.number == nil: 301 return +1 302 default: 303 return pv.post.number.Cmp(pw.post.number) 304 } 305 } 306 307 // Compares the dev-release segment of each version, which consists of the string 308 // ".dev" followed by a non-negative integer value. 309 // 310 // Developmental releases are ordered by their numerical component, immediately 311 // before the corresponding release (and before any pre-releases with the same release segment), 312 // and following any previous release (including any post-releases). 313 // 314 // Versions without a development segment are sorted after those with one. 315 func (pv pyPIVersion) compareDev(pw pyPIVersion) int { 316 switch { 317 case pv.dev.number == nil && pw.dev.number == nil: 318 return +0 319 case pv.dev.number == nil: 320 return +1 321 case pw.dev.number == nil: 322 return -1 323 default: 324 return pv.dev.number.Cmp(pw.dev.number) 325 } 326 } 327 328 // Compares the local segment of each version 329 func (pv pyPIVersion) compareLocal(pw pyPIVersion) int { 330 minVersionLength := min(len(pv.local), len(pw.local)) 331 332 var compare int 333 334 for i := range minVersionLength { 335 ai, aErr := convertToBigInt(pv.local[i]) 336 bi, bErr := convertToBigInt(pw.local[i]) 337 338 switch { 339 // If a segment consists entirely of ASCII digits then that section should be considered an integer for comparison purposes 340 case aErr == nil && bErr == nil: 341 compare = ai.Cmp(bi) 342 // If a segment contains any ASCII letters then that segment is compared lexicographically with case insensitivity. 343 case aErr != nil && bErr != nil: 344 compare = strings.Compare(pv.local[i], pw.local[i]) 345 // When comparing a numeric and lexicographic segment, the numeric section always compares as greater than the lexicographic segment. 346 case aErr == nil: 347 compare = +1 348 default: 349 compare = -1 350 } 351 352 if compare != 0 { 353 if compare > 0 { 354 return 1 355 } 356 357 return -1 358 } 359 } 360 361 // Additionally a local version with a great number of segments will always compare as greater than a local version with fewer segments, 362 // as long as the shorter local version’s segments match the beginning of the longer local version’s segments exactly. 363 if len(pv.local) > len(pw.local) { 364 return +1 365 } 366 if len(pv.local) < len(pw.local) { 367 return -1 368 } 369 370 return 0 371 } 372 373 // Compares the legacy segment of each version. 374 // 375 // These are versions that predate and are incompatible with PEP 440 - comparing 376 // is "best effort" since there isn't a strong specification defined, and are 377 // always considered lower than PEP 440 versions to match current day tooling. 378 // 379 // http://peak.telecommunity.com/DevCenter/setuptools#specifying-your-project-s-version 380 // looks like a good reference, but unsure where it sits in the actual tooling history 381 func (pv pyPIVersion) compareLegacy(pw pyPIVersion) int { 382 if len(pv.legacy) == 0 && len(pw.legacy) == 0 { 383 return +0 384 } 385 if len(pv.legacy) == 0 && len(pw.legacy) != 0 { 386 return +1 387 } 388 if len(pv.legacy) != 0 && len(pw.legacy) == 0 { 389 return -1 390 } 391 392 return strings.Compare( 393 strings.Join(pv.legacy, ""), 394 strings.Join(pw.legacy, ""), 395 ) 396 } 397 398 func pypiCompareVersion(v, w pyPIVersion) int { 399 if legacyDiff := v.compareLegacy(w); legacyDiff != 0 { 400 return legacyDiff 401 } 402 if epochDiff := v.compareEpoch(w); epochDiff != 0 { 403 return epochDiff 404 } 405 if releaseDiff := v.compareRelease(w); releaseDiff != 0 { 406 return releaseDiff 407 } 408 if preDiff := v.comparePre(w); preDiff != 0 { 409 return preDiff 410 } 411 if postDiff := v.comparePost(w); postDiff != 0 { 412 return postDiff 413 } 414 if devDiff := v.compareDev(w); devDiff != 0 { 415 return devDiff 416 } 417 if localDiff := v.compareLocal(w); localDiff != 0 { 418 return localDiff 419 } 420 421 return 0 422 } 423 424 func (pv pyPIVersion) compare(pw pyPIVersion) int { 425 return pypiCompareVersion(pv, pw) 426 } 427 428 func (pv pyPIVersion) CompareStr(str string) (int, error) { 429 pw, err := parsePyPIVersion(str) 430 431 if err != nil { 432 return 0, err 433 } 434 435 return pv.compare(pw), nil 436 }