github.com/gernest/nezuko@v0.1.2/internal/module/module.go (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package module defines the module.Version type
     6  // along with support code.
     7  package module
     8  
     9  // IMPORTANT NOTE
    10  //
    11  // This file essentially defines the set of valid import paths for the go command.
    12  // There are many subtle considerations, including Unicode ambiguity,
    13  // security, network, and file system representations.
    14  //
    15  // This file also defines the set of valid module path and version combinations,
    16  // another topic with many subtle considerations.
    17  //
    18  // Changes to the semantics in this file require approval from rsc.
    19  
    20  import (
    21  	"fmt"
    22  	"sort"
    23  	"strings"
    24  	"unicode"
    25  	"unicode/utf8"
    26  
    27  	"github.com/gernest/nezuko/internal/semver"
    28  )
    29  
    30  // A Version is defined by a module path and version pair.
    31  type Version struct {
    32  	Path string
    33  
    34  	// Version is usually a semantic version in canonical form.
    35  	// There are two exceptions to this general rule.
    36  	// First, the top-level target of a build has no specific version
    37  	// and uses Version = "".
    38  	// Second, during MVS calculations the version "none" is used
    39  	// to represent the decision to take no version of a given module.
    40  	Version string `json:",omitempty"`
    41  }
    42  
    43  // Check checks that a given module path, version pair is valid.
    44  // In addition to the path being a valid module path
    45  // and the version being a valid semantic version,
    46  // the two must correspond.
    47  // For example, the path "yaml/v2" only corresponds to
    48  // semantic versions beginning with "v2.".
    49  func Check(path, version string) error {
    50  	if err := CheckPath(path); err != nil {
    51  		return err
    52  	}
    53  	if !semver.IsValid(version) {
    54  		return fmt.Errorf("malformed semantic version %v", version)
    55  	}
    56  	_, pathMajor, _ := SplitPathVersion(path)
    57  	if !MatchPathMajor(version, pathMajor) {
    58  		if pathMajor == "" {
    59  			pathMajor = "v0 or v1"
    60  		}
    61  		if pathMajor[0] == '.' { // .v1
    62  			pathMajor = pathMajor[1:]
    63  		}
    64  		return fmt.Errorf("mismatched module path %v and version %v (want %v)", path, version, pathMajor)
    65  	}
    66  	return nil
    67  }
    68  
    69  // firstPathOK reports whether r can appear in the first element of a module path.
    70  // The first element of the path must be an LDH domain name, at least for now.
    71  // To avoid case ambiguity, the domain name must be entirely lower case.
    72  func firstPathOK(r rune) bool {
    73  	return r == '-' || r == '.' ||
    74  		'0' <= r && r <= '9' ||
    75  		'a' <= r && r <= 'z'
    76  }
    77  
    78  // pathOK reports whether r can appear in an import path element.
    79  // Paths can be ASCII letters, ASCII digits, and limited ASCII punctuation: + - . _ and ~.
    80  // This matches what "go get" has historically recognized in import paths.
    81  // TODO(rsc): We would like to allow Unicode letters, but that requires additional
    82  // care in the safe encoding (see note below).
    83  func pathOK(r rune) bool {
    84  	if r < utf8.RuneSelf {
    85  		return r == '+' || r == '-' || r == '.' || r == '_' || r == '~' ||
    86  			'0' <= r && r <= '9' ||
    87  			'A' <= r && r <= 'Z' ||
    88  			'a' <= r && r <= 'z'
    89  	}
    90  	return false
    91  }
    92  
    93  // fileNameOK reports whether r can appear in a file name.
    94  // For now we allow all Unicode letters but otherwise limit to pathOK plus a few more punctuation characters.
    95  // If we expand the set of allowed characters here, we have to
    96  // work harder at detecting potential case-folding and normalization collisions.
    97  // See note about "safe encoding" below.
    98  func fileNameOK(r rune) bool {
    99  	if r < utf8.RuneSelf {
   100  		// Entire set of ASCII punctuation, from which we remove characters:
   101  		//     ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~
   102  		// We disallow some shell special characters: " ' * < > ? ` |
   103  		// (Note that some of those are disallowed by the Windows file system as well.)
   104  		// We also disallow path separators / : and \ (fileNameOK is only called on path element characters).
   105  		// We allow spaces (U+0020) in file names.
   106  		const allowed = "!#$%&()+,-.=@[]^_{}~ "
   107  		if '0' <= r && r <= '9' || 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' {
   108  			return true
   109  		}
   110  		for i := 0; i < len(allowed); i++ {
   111  			if rune(allowed[i]) == r {
   112  				return true
   113  			}
   114  		}
   115  		return false
   116  	}
   117  	// It may be OK to add more ASCII punctuation here, but only carefully.
   118  	// For example Windows disallows < > \, and macOS disallows :, so we must not allow those.
   119  	return unicode.IsLetter(r)
   120  }
   121  
   122  // CheckPath checks that a module path is valid.
   123  func CheckPath(path string) error {
   124  	if err := checkPath(path, false); err != nil {
   125  		return fmt.Errorf("malformed module path %q: %v", path, err)
   126  	}
   127  	i := strings.Index(path, "/")
   128  	if i < 0 {
   129  		i = len(path)
   130  	}
   131  	if i == 0 {
   132  		return fmt.Errorf("malformed module path %q: leading slash", path)
   133  	}
   134  	if !strings.Contains(path[:i], ".") {
   135  		return fmt.Errorf("malformed module path %q: missing dot in first path element", path)
   136  	}
   137  	if path[0] == '-' {
   138  		return fmt.Errorf("malformed module path %q: leading dash in first path element", path)
   139  	}
   140  	for _, r := range path[:i] {
   141  		if !firstPathOK(r) {
   142  			return fmt.Errorf("malformed module path %q: invalid char %q in first path element", path, r)
   143  		}
   144  	}
   145  	if _, _, ok := SplitPathVersion(path); !ok {
   146  		return fmt.Errorf("malformed module path %q: invalid version", path)
   147  	}
   148  	return nil
   149  }
   150  
   151  // CheckImportPath checks that an import path is valid.
   152  func CheckImportPath(path string) error {
   153  	if err := checkPath(path, false); err != nil {
   154  		return fmt.Errorf("malformed import path %q: %v", path, err)
   155  	}
   156  	return nil
   157  }
   158  
   159  // checkPath checks that a general path is valid.
   160  // It returns an error describing why but not mentioning path.
   161  // Because these checks apply to both module paths and import paths,
   162  // the caller is expected to add the "malformed ___ path %q: " prefix.
   163  // fileName indicates whether the final element of the path is a file name
   164  // (as opposed to a directory name).
   165  func checkPath(path string, fileName bool) error {
   166  	if !utf8.ValidString(path) {
   167  		return fmt.Errorf("invalid UTF-8")
   168  	}
   169  	if path == "" {
   170  		return fmt.Errorf("empty string")
   171  	}
   172  	if strings.Contains(path, "..") {
   173  		return fmt.Errorf("double dot")
   174  	}
   175  	if strings.Contains(path, "//") {
   176  		return fmt.Errorf("double slash")
   177  	}
   178  	if path[len(path)-1] == '/' {
   179  		return fmt.Errorf("trailing slash")
   180  	}
   181  	elemStart := 0
   182  	for i, r := range path {
   183  		if r == '/' {
   184  			if err := checkElem(path[elemStart:i], fileName); err != nil {
   185  				return err
   186  			}
   187  			elemStart = i + 1
   188  		}
   189  	}
   190  	if err := checkElem(path[elemStart:], fileName); err != nil {
   191  		return err
   192  	}
   193  	return nil
   194  }
   195  
   196  // checkElem checks whether an individual path element is valid.
   197  // fileName indicates whether the element is a file name (not a directory name).
   198  func checkElem(elem string, fileName bool) error {
   199  	if elem == "" {
   200  		return fmt.Errorf("empty path element")
   201  	}
   202  	if strings.Count(elem, ".") == len(elem) {
   203  		return fmt.Errorf("invalid path element %q", elem)
   204  	}
   205  	if elem[0] == '.' && !fileName {
   206  		return fmt.Errorf("leading dot in path element")
   207  	}
   208  	if elem[len(elem)-1] == '.' {
   209  		return fmt.Errorf("trailing dot in path element")
   210  	}
   211  	charOK := pathOK
   212  	if fileName {
   213  		charOK = fileNameOK
   214  	}
   215  	for _, r := range elem {
   216  		if !charOK(r) {
   217  			return fmt.Errorf("invalid char %q", r)
   218  		}
   219  	}
   220  
   221  	// Windows disallows a bunch of path elements, sadly.
   222  	// See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
   223  	short := elem
   224  	if i := strings.Index(short, "."); i >= 0 {
   225  		short = short[:i]
   226  	}
   227  	for _, bad := range badWindowsNames {
   228  		if strings.EqualFold(bad, short) {
   229  			return fmt.Errorf("%q disallowed as path element component on Windows", short)
   230  		}
   231  	}
   232  	return nil
   233  }
   234  
   235  // CheckFilePath checks whether a slash-separated file path is valid.
   236  func CheckFilePath(path string) error {
   237  	if err := checkPath(path, true); err != nil {
   238  		return fmt.Errorf("malformed file path %q: %v", path, err)
   239  	}
   240  	return nil
   241  }
   242  
   243  // badWindowsNames are the reserved file path elements on Windows.
   244  // See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
   245  var badWindowsNames = []string{
   246  	"CON",
   247  	"PRN",
   248  	"AUX",
   249  	"NUL",
   250  	"COM1",
   251  	"COM2",
   252  	"COM3",
   253  	"COM4",
   254  	"COM5",
   255  	"COM6",
   256  	"COM7",
   257  	"COM8",
   258  	"COM9",
   259  	"LPT1",
   260  	"LPT2",
   261  	"LPT3",
   262  	"LPT4",
   263  	"LPT5",
   264  	"LPT6",
   265  	"LPT7",
   266  	"LPT8",
   267  	"LPT9",
   268  }
   269  
   270  // SplitPathVersion returns prefix and major version such that prefix+pathMajor == path
   271  // and version is either empty or "/vN" for N >= 2.
   272  // As a special case, gopkg.in paths are recognized directly;
   273  // they require ".vN" instead of "/vN", and for all N, not just N >= 2.
   274  func SplitPathVersion(path string) (prefix, pathMajor string, ok bool) {
   275  	if strings.HasPrefix(path, "gopkg.in/") {
   276  		return splitGopkgIn(path)
   277  	}
   278  
   279  	i := len(path)
   280  	dot := false
   281  	for i > 0 && ('0' <= path[i-1] && path[i-1] <= '9' || path[i-1] == '.') {
   282  		if path[i-1] == '.' {
   283  			dot = true
   284  		}
   285  		i--
   286  	}
   287  	if i <= 1 || i == len(path) || path[i-1] != 'v' || path[i-2] != '/' {
   288  		return path, "", true
   289  	}
   290  	prefix, pathMajor = path[:i-2], path[i-2:]
   291  	if dot || len(pathMajor) <= 2 || pathMajor[2] == '0' || pathMajor == "/v1" {
   292  		return path, "", false
   293  	}
   294  	return prefix, pathMajor, true
   295  }
   296  
   297  // splitGopkgIn is like SplitPathVersion but only for gopkg.in paths.
   298  func splitGopkgIn(path string) (prefix, pathMajor string, ok bool) {
   299  	if !strings.HasPrefix(path, "gopkg.in/") {
   300  		return path, "", false
   301  	}
   302  	i := len(path)
   303  	if strings.HasSuffix(path, "-unstable") {
   304  		i -= len("-unstable")
   305  	}
   306  	for i > 0 && ('0' <= path[i-1] && path[i-1] <= '9') {
   307  		i--
   308  	}
   309  	if i <= 1 || path[i-1] != 'v' || path[i-2] != '.' {
   310  		// All gopkg.in paths must end in vN for some N.
   311  		return path, "", false
   312  	}
   313  	prefix, pathMajor = path[:i-2], path[i-2:]
   314  	if len(pathMajor) <= 2 || pathMajor[2] == '0' && pathMajor != ".v0" {
   315  		return path, "", false
   316  	}
   317  	return prefix, pathMajor, true
   318  }
   319  
   320  // MatchPathMajor reports whether the semantic version v
   321  // matches the path major version pathMajor.
   322  func MatchPathMajor(v, pathMajor string) bool {
   323  	if strings.HasPrefix(pathMajor, ".v") && strings.HasSuffix(pathMajor, "-unstable") {
   324  		pathMajor = strings.TrimSuffix(pathMajor, "-unstable")
   325  	}
   326  	if strings.HasPrefix(v, "v0.0.0-") && pathMajor == ".v1" {
   327  		// Allow old bug in pseudo-versions that generated v0.0.0- pseudoversion for gopkg .v1.
   328  		// For example, gopkg.in/yaml.v2@v2.2.1's z.mod requires gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405.
   329  		return true
   330  	}
   331  	m := semver.Major(v)
   332  	if pathMajor == "" {
   333  		return m == "v0" || m == "v1" || semver.Build(v) == "+incompatible"
   334  	}
   335  	return (pathMajor[0] == '/' || pathMajor[0] == '.') && m == pathMajor[1:]
   336  }
   337  
   338  // CanonicalVersion returns the canonical form of the version string v.
   339  // It is the same as semver.Canonical(v) except that it preserves the special build suffix "+incompatible".
   340  func CanonicalVersion(v string) string {
   341  	cv := semver.Canonical(v)
   342  	if semver.Build(v) == "+incompatible" {
   343  		cv += "+incompatible"
   344  	}
   345  	return cv
   346  }
   347  
   348  // Sort sorts the list by Path, breaking ties by comparing Versions.
   349  func Sort(list []Version) {
   350  	sort.Slice(list, func(i, j int) bool {
   351  		mi := list[i]
   352  		mj := list[j]
   353  		if mi.Path != mj.Path {
   354  			return mi.Path < mj.Path
   355  		}
   356  		// To help go.sum formatting, allow version/file.
   357  		// Compare semver prefix by semver rules,
   358  		// file by string order.
   359  		vi := mi.Version
   360  		vj := mj.Version
   361  		var fi, fj string
   362  		if k := strings.Index(vi, "/"); k >= 0 {
   363  			vi, fi = vi[:k], vi[k:]
   364  		}
   365  		if k := strings.Index(vj, "/"); k >= 0 {
   366  			vj, fj = vj[:k], vj[k:]
   367  		}
   368  		if vi != vj {
   369  			return semver.Compare(vi, vj) < 0
   370  		}
   371  		return fi < fj
   372  	})
   373  }
   374  
   375  // Safe encodings
   376  //
   377  // Module paths appear as substrings of file system paths
   378  // (in the download cache) and of web server URLs in the proxy protocol.
   379  // In general we cannot rely on file systems to be case-sensitive,
   380  // nor can we rely on web servers, since they read from file systems.
   381  // That is, we cannot rely on the file system to keep rsc.io/QUOTE
   382  // and rsc.io/quote separate. Windows and macOS don't.
   383  // Instead, we must never require two different casings of a file path.
   384  // Because we want the download cache to match the proxy protocol,
   385  // and because we want the proxy protocol to be possible to serve
   386  // from a tree of static files (which might be stored on a case-insensitive
   387  // file system), the proxy protocol must never require two different casings
   388  // of a URL path either.
   389  //
   390  // One possibility would be to make the safe encoding be the lowercase
   391  // hexadecimal encoding of the actual path bytes. This would avoid ever
   392  // needing different casings of a file path, but it would be fairly illegible
   393  // to most programmers when those paths appeared in the file system
   394  // (including in file paths in compiler errors and stack traces)
   395  // in web server logs, and so on. Instead, we want a safe encoding that
   396  // leaves most paths unaltered.
   397  //
   398  // The safe encoding is this:
   399  // replace every uppercase letter with an exclamation mark
   400  // followed by the letter's lowercase equivalent.
   401  //
   402  // For example,
   403  // github.com/Azure/azure-sdk-for-go ->  github.com/!azure/azure-sdk-for-go.
   404  // github.com/GoogleCloudPlatform/cloudsql-proxy -> github.com/!google!cloud!platform/cloudsql-proxy
   405  // github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus.
   406  //
   407  // Import paths that avoid upper-case letters are left unchanged.
   408  // Note that because import paths are ASCII-only and avoid various
   409  // problematic punctuation (like : < and >), the safe encoding is also ASCII-only
   410  // and avoids the same problematic punctuation.
   411  //
   412  // Import paths have never allowed exclamation marks, so there is no
   413  // need to define how to encode a literal !.
   414  //
   415  // Although paths are disallowed from using Unicode (see pathOK above),
   416  // the eventual plan is to allow Unicode letters as well, to assume that
   417  // file systems and URLs are Unicode-safe (storing UTF-8), and apply
   418  // the !-for-uppercase convention. Note however that not all runes that
   419  // are different but case-fold equivalent are an upper/lower pair.
   420  // For example, U+004B ('K'), U+006B ('k'), and U+212A ('K' for Kelvin)
   421  // are considered to case-fold to each other. When we do add Unicode
   422  // letters, we must not assume that upper/lower are the only case-equivalent pairs.
   423  // Perhaps the Kelvin symbol would be disallowed entirely, for example.
   424  // Or perhaps it would encode as "!!k", or perhaps as "(212A)".
   425  //
   426  // Also, it would be nice to allow Unicode marks as well as letters,
   427  // but marks include combining marks, and then we must deal not
   428  // only with case folding but also normalization: both U+00E9 ('é')
   429  // and U+0065 U+0301 ('e' followed by combining acute accent)
   430  // look the same on the page and are treated by some file systems
   431  // as the same path. If we do allow Unicode marks in paths, there
   432  // must be some kind of normalization to allow only one canonical
   433  // encoding of any character used in an import path.
   434  
   435  // EncodePath returns the safe encoding of the given module path.
   436  // It fails if the module path is invalid.
   437  func EncodePath(path string) (encoding string, err error) {
   438  	if err := CheckPath(path); err != nil {
   439  		return "", err
   440  	}
   441  
   442  	return encodeString(path)
   443  }
   444  
   445  // EncodeVersion returns the safe encoding of the given module version.
   446  // Versions are allowed to be in non-semver form but must be valid file names
   447  // and not contain exclamation marks.
   448  func EncodeVersion(v string) (encoding string, err error) {
   449  	if err := checkElem(v, true); err != nil || strings.Contains(v, "!") {
   450  		return "", fmt.Errorf("disallowed version string %q", v)
   451  	}
   452  	return encodeString(v)
   453  }
   454  
   455  func encodeString(s string) (encoding string, err error) {
   456  	haveUpper := false
   457  	for _, r := range s {
   458  		if r == '!' || r >= utf8.RuneSelf {
   459  			// This should be disallowed by CheckPath, but diagnose anyway.
   460  			// The correctness of the encoding loop below depends on it.
   461  			return "", fmt.Errorf("internal error: inconsistency in EncodePath")
   462  		}
   463  		if 'A' <= r && r <= 'Z' {
   464  			haveUpper = true
   465  		}
   466  	}
   467  
   468  	if !haveUpper {
   469  		return s, nil
   470  	}
   471  
   472  	var buf []byte
   473  	for _, r := range s {
   474  		if 'A' <= r && r <= 'Z' {
   475  			buf = append(buf, '!', byte(r+'a'-'A'))
   476  		} else {
   477  			buf = append(buf, byte(r))
   478  		}
   479  	}
   480  	return string(buf), nil
   481  }
   482  
   483  // DecodePath returns the module path of the given safe encoding.
   484  // It fails if the encoding is invalid or encodes an invalid path.
   485  func DecodePath(encoding string) (path string, err error) {
   486  	path, ok := decodeString(encoding)
   487  	if !ok {
   488  		return "", fmt.Errorf("invalid module path encoding %q", encoding)
   489  	}
   490  	if err := CheckPath(path); err != nil {
   491  		return "", fmt.Errorf("invalid module path encoding %q: %v", encoding, err)
   492  	}
   493  	return path, nil
   494  }
   495  
   496  // DecodeVersion returns the version string for the given safe encoding.
   497  // It fails if the encoding is invalid or encodes an invalid version.
   498  // Versions are allowed to be in non-semver form but must be valid file names
   499  // and not contain exclamation marks.
   500  func DecodeVersion(encoding string) (v string, err error) {
   501  	v, ok := decodeString(encoding)
   502  	if !ok {
   503  		return "", fmt.Errorf("invalid version encoding %q", encoding)
   504  	}
   505  	if err := checkElem(v, true); err != nil {
   506  		return "", fmt.Errorf("disallowed version string %q", v)
   507  	}
   508  	return v, nil
   509  }
   510  
   511  func decodeString(encoding string) (string, bool) {
   512  	var buf []byte
   513  
   514  	bang := false
   515  	for _, r := range encoding {
   516  		if r >= utf8.RuneSelf {
   517  			return "", false
   518  		}
   519  		if bang {
   520  			bang = false
   521  			if r < 'a' || 'z' < r {
   522  				return "", false
   523  			}
   524  			buf = append(buf, byte(r+'A'-'a'))
   525  			continue
   526  		}
   527  		if r == '!' {
   528  			bang = true
   529  			continue
   530  		}
   531  		if 'A' <= r && r <= 'Z' {
   532  			return "", false
   533  		}
   534  		buf = append(buf, byte(r))
   535  	}
   536  	if bang {
   537  		return "", false
   538  	}
   539  	return string(buf), true
   540  }