github.com/google/osv-scalibr@v0.4.1/semantic/version-pypi.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package semantic
    16  
    17  import (
    18  	"fmt"
    19  	"math/big"
    20  	"regexp"
    21  	"strings"
    22  )
    23  
    24  var (
    25  	pypiLocalVersionSplitter = regexp.MustCompile(`[._-]`)
    26  	pypiVersionPartsFinder   = regexp.MustCompile(`(\d+|[a-z]+|\.|-)`)
    27  	// from https://peps.python.org/pep-0440/#appendix-b-parsing-version-strings-with-regular-expressions
    28  	pypiVersionFinder = regexp.MustCompile(`^\s*v?(?:(?:(?P<epoch>[0-9]+)!)?(?P<release>[0-9]+(?:\.[0-9]+)*)(?P<pre>[-_\.]?(?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))[-_\.]?(?P<pre_n>[0-9]+)?)?(?P<post>(?:-(?P<post_n1>[0-9]+))|(?:[-_\.]?(?P<post_l>post|rev|r)[-_\.]?(?P<post_n2>[0-9]+)?))?(?P<dev>[-_\.]?(?P<dev_l>dev)[-_\.]?(?P<dev_n>[0-9]+)?)?)(?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?\s*$`)
    29  )
    30  
    31  type pyPIVersion struct {
    32  	epoch   *big.Int
    33  	release components
    34  	pre     letterAndNumber
    35  	post    letterAndNumber
    36  	dev     letterAndNumber
    37  	local   []string
    38  	legacy  []string
    39  }
    40  
    41  type letterAndNumber struct {
    42  	letter string
    43  	number *big.Int
    44  }
    45  
    46  func parseLetterVersion(letter, number string) (letterAndNumber, error) {
    47  	if letter != "" {
    48  		// we consider there to be an implicit 0 in a pre-release
    49  		// if there is not a numeral associated with it
    50  		if number == "" {
    51  			number = "0"
    52  		}
    53  
    54  		// we normalize any letters to their lowercase form
    55  		letter = strings.ToLower(letter)
    56  
    57  		// we consider some words to be alternative spellings of other words and in
    58  		// those cases we want to normalize the spellings to our preferred spelling
    59  		switch letter {
    60  		case "alpha":
    61  			letter = "a"
    62  		case "beta":
    63  			letter = "b"
    64  		case "c":
    65  			fallthrough
    66  		case "pre":
    67  			fallthrough
    68  		case "preview":
    69  			letter = "rc"
    70  		case "rev":
    71  			fallthrough
    72  		case "r":
    73  			letter = "post"
    74  		}
    75  
    76  		num, err := convertToBigInt(number)
    77  
    78  		if err != nil {
    79  			return letterAndNumber{}, err
    80  		}
    81  
    82  		return letterAndNumber{letter, num}, nil
    83  	}
    84  
    85  	if number != "" {
    86  		// we assume if we're given a number but not a letter then this is using
    87  		// the implicit post release syntax (e.g. 1.0-1)
    88  		letter = "post"
    89  
    90  		num, err := convertToBigInt(number)
    91  
    92  		if err != nil {
    93  			return letterAndNumber{}, err
    94  		}
    95  
    96  		return letterAndNumber{letter, num}, nil
    97  	}
    98  
    99  	return letterAndNumber{}, nil
   100  }
   101  
   102  func parseLocalVersion(local string) (parts []string) {
   103  	for _, part := range pypiLocalVersionSplitter.Split(local, -1) {
   104  		parts = append(parts, strings.ToLower(part))
   105  	}
   106  
   107  	return parts
   108  }
   109  
   110  func normalizePyPILegacyPart(part string) string {
   111  	switch part {
   112  	case "pre":
   113  		part = "c"
   114  	case "preview":
   115  		part = "c"
   116  	case "-":
   117  		part = "final-"
   118  	case "rc":
   119  		part = "c"
   120  	case "dev":
   121  		part = "@"
   122  	}
   123  
   124  	if isASCIIDigit(rune(part[0])) {
   125  		// pad for numeric comparison
   126  		return fmt.Sprintf("%08s", part)
   127  	}
   128  
   129  	return "*" + part
   130  }
   131  
   132  func parsePyPIVersionParts(str string) (parts []string) {
   133  	splits := pypiVersionPartsFinder.FindAllString(str, -1)
   134  	splits = append(splits, "final")
   135  
   136  	for _, part := range splits {
   137  		if part == "" || part == "." {
   138  			continue
   139  		}
   140  
   141  		part = normalizePyPILegacyPart(part)
   142  
   143  		if strings.HasPrefix(part, "*") {
   144  			if strings.Compare(part, "*final") < 0 {
   145  				for len(parts) > 0 && parts[len(parts)-1] == "*final-" {
   146  					parts = parts[:len(parts)-1]
   147  				}
   148  			}
   149  
   150  			for len(parts) > 0 && parts[len(parts)-1] == "00000000" {
   151  				parts = parts[:len(parts)-1]
   152  			}
   153  		}
   154  
   155  		parts = append(parts, part)
   156  	}
   157  
   158  	return parts
   159  }
   160  
   161  func parsePyPILegacyVersion(str string) pyPIVersion {
   162  	parts := parsePyPIVersionParts(str)
   163  
   164  	return pyPIVersion{epoch: big.NewInt(-1), legacy: parts}
   165  }
   166  
   167  func parsePyPIVersion(str string) (pyPIVersion, error) {
   168  	str = strings.ToLower(str)
   169  
   170  	match := pypiVersionFinder.FindStringSubmatch(str)
   171  
   172  	if len(match) == 0 {
   173  		return parsePyPILegacyVersion(str), nil
   174  	}
   175  
   176  	var version pyPIVersion
   177  
   178  	version.epoch = big.NewInt(0)
   179  
   180  	if epStr := match[pypiVersionFinder.SubexpIndex("epoch")]; epStr != "" {
   181  		epoch, err := convertToBigInt(epStr)
   182  
   183  		if err != nil {
   184  			return pyPIVersion{}, err
   185  		}
   186  
   187  		version.epoch = epoch
   188  	}
   189  
   190  	for r := range strings.SplitSeq(match[pypiVersionFinder.SubexpIndex("release")], ".") {
   191  		release, err := convertToBigInt(r)
   192  
   193  		if err != nil {
   194  			return pyPIVersion{}, err
   195  		}
   196  
   197  		version.release = append(version.release, release)
   198  	}
   199  
   200  	pre, err := parseLetterVersion(match[pypiVersionFinder.SubexpIndex("pre_l")], match[pypiVersionFinder.SubexpIndex("pre_n")])
   201  
   202  	if err != nil {
   203  		return pyPIVersion{}, err
   204  	}
   205  
   206  	version.pre = pre
   207  
   208  	post := match[pypiVersionFinder.SubexpIndex("post_n1")]
   209  
   210  	if post == "" {
   211  		post = match[pypiVersionFinder.SubexpIndex("post_n2")]
   212  	}
   213  
   214  	post2, err := parseLetterVersion(match[pypiVersionFinder.SubexpIndex("post_l")], post)
   215  
   216  	if err != nil {
   217  		return pyPIVersion{}, err
   218  	}
   219  
   220  	version.post = post2
   221  
   222  	dev, err := parseLetterVersion(match[pypiVersionFinder.SubexpIndex("dev_l")], match[pypiVersionFinder.SubexpIndex("dev_n")])
   223  
   224  	if err != nil {
   225  		return pyPIVersion{}, err
   226  	}
   227  
   228  	version.dev = dev
   229  	version.local = parseLocalVersion(match[pypiVersionFinder.SubexpIndex("local")])
   230  
   231  	return version, nil
   232  }
   233  
   234  // Compares the epoch segments of each version
   235  func (pv pyPIVersion) compareEpoch(pw pyPIVersion) int {
   236  	return pv.epoch.Cmp(pw.epoch)
   237  }
   238  
   239  // Compares the release segments of each version, which considers the numeric value
   240  // of each component in turn; when comparing release segments with different numbers
   241  // of components, the shorter segment is padded out with additional zeros as necessary.
   242  func (pv pyPIVersion) compareRelease(pw pyPIVersion) int {
   243  	return pv.release.Cmp(pw.release)
   244  }
   245  
   246  // Checks if this pyPIVersion should apply a sort trick when comparing pre,
   247  // which ensures that i.e. 1.0.dev0 is before 1.0a0.
   248  func (pv pyPIVersion) shouldApplyPreTrick() bool {
   249  	return pv.pre.number == nil && pv.post.number == nil && pv.dev.number != nil
   250  }
   251  
   252  // Compares the pre-release segment of each version, which consist of an alphabetical
   253  // identifier for the pre-release phase, along with a non-negative integer value.
   254  //
   255  // Pre-releases for a given release are ordered first by phase (alpha, beta, release
   256  // candidate) and then by the numerical component within that phase.
   257  //
   258  // Versions without a pre-release are sorted after those with one.
   259  func (pv pyPIVersion) comparePre(pw pyPIVersion) int {
   260  	switch {
   261  	case pv.shouldApplyPreTrick() && pw.shouldApplyPreTrick():
   262  		return +0
   263  	case pv.shouldApplyPreTrick():
   264  		return -1
   265  	case pw.shouldApplyPreTrick():
   266  		return +1
   267  	case pv.pre.number == nil && pw.pre.number == nil:
   268  		return +0
   269  	case pv.pre.number == nil:
   270  		return +1
   271  	case pw.pre.number == nil:
   272  		return -1
   273  	default:
   274  		ai := pv.pre.letter[0]
   275  		bi := pw.pre.letter[0]
   276  
   277  		if ai > bi {
   278  			return +1
   279  		}
   280  		if ai < bi {
   281  			return -1
   282  		}
   283  
   284  		return pv.pre.number.Cmp(pw.pre.number)
   285  	}
   286  }
   287  
   288  // Compares the post-release segment of each version.
   289  //
   290  // Post-releases are ordered by their numerical component, immediately following
   291  // the corresponding release, and ahead of any subsequent release.
   292  //
   293  // Versions without a post segment are sorted before those with one.
   294  func (pv pyPIVersion) comparePost(pw pyPIVersion) int {
   295  	switch {
   296  	case pv.post.number == nil && pw.post.number == nil:
   297  		return +0
   298  	case pv.post.number == nil:
   299  		return -1
   300  	case pw.post.number == nil:
   301  		return +1
   302  	default:
   303  		return pv.post.number.Cmp(pw.post.number)
   304  	}
   305  }
   306  
   307  // Compares the dev-release segment of each version, which consists of the string
   308  // ".dev" followed by a non-negative integer value.
   309  //
   310  // Developmental releases are ordered by their numerical component, immediately
   311  // before the corresponding release (and before any pre-releases with the same release segment),
   312  // and following any previous release (including any post-releases).
   313  //
   314  // Versions without a development segment are sorted after those with one.
   315  func (pv pyPIVersion) compareDev(pw pyPIVersion) int {
   316  	switch {
   317  	case pv.dev.number == nil && pw.dev.number == nil:
   318  		return +0
   319  	case pv.dev.number == nil:
   320  		return +1
   321  	case pw.dev.number == nil:
   322  		return -1
   323  	default:
   324  		return pv.dev.number.Cmp(pw.dev.number)
   325  	}
   326  }
   327  
   328  // Compares the local segment of each version
   329  func (pv pyPIVersion) compareLocal(pw pyPIVersion) int {
   330  	minVersionLength := min(len(pv.local), len(pw.local))
   331  
   332  	var compare int
   333  
   334  	for i := range minVersionLength {
   335  		ai, aErr := convertToBigInt(pv.local[i])
   336  		bi, bErr := convertToBigInt(pw.local[i])
   337  
   338  		switch {
   339  		// If a segment consists entirely of ASCII digits then that section should be considered an integer for comparison purposes
   340  		case aErr == nil && bErr == nil:
   341  			compare = ai.Cmp(bi)
   342  		// If a segment contains any ASCII letters then that segment is compared lexicographically with case insensitivity.
   343  		case aErr != nil && bErr != nil:
   344  			compare = strings.Compare(pv.local[i], pw.local[i])
   345  		// When comparing a numeric and lexicographic segment, the numeric section always compares as greater than the lexicographic segment.
   346  		case aErr == nil:
   347  			compare = +1
   348  		default:
   349  			compare = -1
   350  		}
   351  
   352  		if compare != 0 {
   353  			if compare > 0 {
   354  				return 1
   355  			}
   356  
   357  			return -1
   358  		}
   359  	}
   360  
   361  	// Additionally a local version with a great number of segments will always compare as greater than a local version with fewer segments,
   362  	// as long as the shorter local version’s segments match the beginning of the longer local version’s segments exactly.
   363  	if len(pv.local) > len(pw.local) {
   364  		return +1
   365  	}
   366  	if len(pv.local) < len(pw.local) {
   367  		return -1
   368  	}
   369  
   370  	return 0
   371  }
   372  
   373  // Compares the legacy segment of each version.
   374  //
   375  // These are versions that predate and are incompatible with PEP 440 - comparing
   376  // is "best effort" since there isn't a strong specification defined, and are
   377  // always considered lower than PEP 440 versions to match current day tooling.
   378  //
   379  // http://peak.telecommunity.com/DevCenter/setuptools#specifying-your-project-s-version
   380  // looks like a good reference, but unsure where it sits in the actual tooling history
   381  func (pv pyPIVersion) compareLegacy(pw pyPIVersion) int {
   382  	if len(pv.legacy) == 0 && len(pw.legacy) == 0 {
   383  		return +0
   384  	}
   385  	if len(pv.legacy) == 0 && len(pw.legacy) != 0 {
   386  		return +1
   387  	}
   388  	if len(pv.legacy) != 0 && len(pw.legacy) == 0 {
   389  		return -1
   390  	}
   391  
   392  	return strings.Compare(
   393  		strings.Join(pv.legacy, ""),
   394  		strings.Join(pw.legacy, ""),
   395  	)
   396  }
   397  
   398  func pypiCompareVersion(v, w pyPIVersion) int {
   399  	if legacyDiff := v.compareLegacy(w); legacyDiff != 0 {
   400  		return legacyDiff
   401  	}
   402  	if epochDiff := v.compareEpoch(w); epochDiff != 0 {
   403  		return epochDiff
   404  	}
   405  	if releaseDiff := v.compareRelease(w); releaseDiff != 0 {
   406  		return releaseDiff
   407  	}
   408  	if preDiff := v.comparePre(w); preDiff != 0 {
   409  		return preDiff
   410  	}
   411  	if postDiff := v.comparePost(w); postDiff != 0 {
   412  		return postDiff
   413  	}
   414  	if devDiff := v.compareDev(w); devDiff != 0 {
   415  		return devDiff
   416  	}
   417  	if localDiff := v.compareLocal(w); localDiff != 0 {
   418  		return localDiff
   419  	}
   420  
   421  	return 0
   422  }
   423  
   424  func (pv pyPIVersion) compare(pw pyPIVersion) int {
   425  	return pypiCompareVersion(pv, pw)
   426  }
   427  
   428  func (pv pyPIVersion) CompareStr(str string) (int, error) {
   429  	pw, err := parsePyPIVersion(str)
   430  
   431  	if err != nil {
   432  		return 0, err
   433  	}
   434  
   435  	return pv.compare(pw), nil
   436  }