cuelang.org/go@v0.10.1/mod/module/path.go (about)

     1  package module
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"regexp"
     7  	"strings"
     8  	"sync"
     9  	"unicode"
    10  	"unicode/utf8"
    11  
    12  	"cuelang.org/go/cue/ast"
    13  	"cuelang.org/go/internal/mod/semver"
    14  )
    15  
    16  // The following regular expressions come from https://github.com/opencontainers/distribution-spec/blob/main/spec.md#pulling-manifests
    17  // and ensure that we can store modules inside OCI registries.
    18  var (
    19  	basePathPat = sync.OnceValue(func() *regexp.Regexp {
    20  		return regexp.MustCompile(`^[a-z0-9]+((\.|_|__|-+)[a-z0-9]+)*(/[a-z0-9]+((\.|_|__|-+)[a-z0-9]+)*)*$`)
    21  	})
    22  	tagPat = sync.OnceValue(func() *regexp.Regexp {
    23  		return regexp.MustCompile(`^[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}$`)
    24  	})
    25  )
    26  
    27  // Check checks that a given module path, version pair is valid.
    28  // In addition to the path being a valid module path
    29  // and the version being a valid semantic version,
    30  // the two must correspond.
    31  // For example, the path "foo.com/bar@v2" only corresponds to
    32  // semantic versions beginning with "v2.".
    33  func Check(path, version string) error {
    34  	if err := CheckPath(path); err != nil {
    35  		return err
    36  	}
    37  	if !semver.IsValid(version) {
    38  		return &ModuleError{
    39  			Path: path,
    40  			Err:  &InvalidVersionError{Version: version, Err: errors.New("not a semantic version")},
    41  		}
    42  	}
    43  	_, pathMajor, _ := SplitPathVersion(path)
    44  	if err := CheckPathMajor(version, pathMajor); err != nil {
    45  		return &ModuleError{Path: path, Err: err}
    46  	}
    47  	return nil
    48  }
    49  
    50  // firstPathOK reports whether r can appear in the first element of a module path.
    51  // The first element of the path must be an LDH domain name, at least for now.
    52  // To avoid case ambiguity, the domain name must be entirely lower case.
    53  func firstPathOK(r rune) bool {
    54  	return r == '-' || r == '.' ||
    55  		'0' <= r && r <= '9' ||
    56  		'a' <= r && r <= 'z'
    57  }
    58  
    59  // modPathOK reports whether r can appear in a module path element.
    60  // Paths can be ASCII letters, ASCII digits, and limited ASCII punctuation: - . _ and ~.
    61  func modPathOK(r rune) bool {
    62  	if r < utf8.RuneSelf {
    63  		return r == '-' || r == '.' || r == '_' ||
    64  			'0' <= r && r <= '9' ||
    65  			'a' <= r && r <= 'z'
    66  	}
    67  	return false
    68  }
    69  
    70  // importPathOK reports whether r can appear in a package import path element.
    71  //
    72  // Import paths are intermediate between module paths and file paths: we allow
    73  // disallow characters that would be confusing or ambiguous as arguments to
    74  // 'go get' (such as '@' and ' ' ), but allow certain characters that are
    75  // otherwise-unambiguous on the command line and historically used for some
    76  // binary names (such as '++' as a suffix for compiler binaries and wrappers).
    77  func importPathOK(r rune) bool {
    78  	return modPathOK(r) ||
    79  		r == '+' ||
    80  		r == '~' ||
    81  		'A' <= r && r <= 'Z'
    82  }
    83  
    84  // fileNameOK reports whether r can appear in a file name.
    85  // For now we allow all Unicode letters but otherwise limit to pathOK plus a few more punctuation characters.
    86  // If we expand the set of allowed characters here, we have to
    87  // work harder at detecting potential case-folding and normalization collisions.
    88  // See note about "escaped paths" above.
    89  func fileNameOK(r rune) bool {
    90  	if r < utf8.RuneSelf {
    91  		// Entire set of ASCII punctuation, from which we remove characters:
    92  		//     ! " # $ % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ ` { | } ~
    93  		// We disallow some shell special characters: " ' * < > ? ` |
    94  		// (Note that some of those are disallowed by the Windows file system as well.)
    95  		// We also disallow path separators / : and \ (fileNameOK is only called on path element characters).
    96  		// We allow spaces (U+0020) in file names.
    97  		const allowed = "!#$%&()+,-.=@[]^_{}~ "
    98  		if '0' <= r && r <= '9' || 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' {
    99  			return true
   100  		}
   101  		return strings.ContainsRune(allowed, r)
   102  	}
   103  	// It may be OK to add more ASCII punctuation here, but only carefully.
   104  	// For example Windows disallows < > \, and macOS disallows :, so we must not allow those.
   105  	return unicode.IsLetter(r)
   106  }
   107  
   108  // CheckPathWithoutVersion is like CheckPath except that
   109  // it expects a module path without a major version.
   110  func CheckPathWithoutVersion(basePath string) (err error) {
   111  	if _, _, ok := SplitPathVersion(basePath); ok {
   112  		return fmt.Errorf("module path inappropriately contains major version")
   113  	}
   114  	if err := checkPath(basePath, modulePath); err != nil {
   115  		return err
   116  	}
   117  	i := strings.Index(basePath, "/")
   118  	if i < 0 {
   119  		i = len(basePath)
   120  	}
   121  	if i == 0 {
   122  		return fmt.Errorf("leading slash")
   123  	}
   124  	if !strings.Contains(basePath[:i], ".") {
   125  		return fmt.Errorf("missing dot in first path element")
   126  	}
   127  	if basePath[0] == '-' {
   128  		return fmt.Errorf("leading dash in first path element")
   129  	}
   130  	for _, r := range basePath[:i] {
   131  		if !firstPathOK(r) {
   132  			return fmt.Errorf("invalid char %q in first path element", r)
   133  		}
   134  	}
   135  	// Sanity check agreement with OCI specs.
   136  	if !basePathPat().MatchString(basePath) {
   137  		return fmt.Errorf("path does not conform to OCI repository name restrictions; see https://github.com/opencontainers/distribution-spec/blob/HEAD/spec.md#pulling-manifests")
   138  	}
   139  	return nil
   140  }
   141  
   142  // CheckPath checks that a module path is valid.
   143  // A valid module path is a valid import path, as checked by CheckImportPath,
   144  // with three additional constraints.
   145  //
   146  // First, the leading path element (up to the first slash, if any),
   147  // by convention a domain name, must contain only lower-case ASCII letters,
   148  // ASCII digits, dots (U+002E), and dashes (U+002D);
   149  // it must contain at least one dot and cannot start with a dash.
   150  //
   151  // Second, there may be a final major version of the form
   152  // @vN where N looks numeric
   153  // (ASCII digits) and must not begin with a leading zero.
   154  // Without such a major version, the major version is assumed
   155  // to be v0.
   156  //
   157  // Third, no path element may begin with a dot.
   158  func CheckPath(mpath string) (err error) {
   159  	if mpath == "local" {
   160  		return nil
   161  	}
   162  	defer func() {
   163  		if err != nil {
   164  			err = &InvalidPathError{Kind: "module", Path: mpath, Err: err}
   165  		}
   166  	}()
   167  
   168  	basePath, vers, ok := SplitPathVersion(mpath)
   169  	if ok {
   170  		if semver.Major(vers) != vers {
   171  			return fmt.Errorf("path can contain major version only")
   172  		}
   173  		if !tagPat().MatchString(vers) {
   174  			return fmt.Errorf("non-conforming version %q", vers)
   175  		}
   176  	} else {
   177  		basePath = mpath
   178  	}
   179  	if err := CheckPathWithoutVersion(basePath); err != nil {
   180  		return err
   181  	}
   182  	return nil
   183  }
   184  
   185  // CheckImportPath checks that an import path is valid.
   186  //
   187  // A valid import path consists of one or more valid path elements
   188  // separated by slashes (U+002F), optionally followed by
   189  // an @vN (major version) qualifier.
   190  // The path part must not begin with nor end in a slash.
   191  //
   192  // A valid path element is a non-empty string made up of
   193  // lower case ASCII letters, ASCII digits, and limited ASCII punctuation: - . and _
   194  // Punctuation characters may not be adjacent and must be between non-punctuation
   195  // characters.
   196  //
   197  // The element prefix up to the first dot must not be a reserved file name
   198  // on Windows, regardless of case (CON, com1, NuL, and so on).
   199  func CheckImportPath(path string) error {
   200  	parts := ParseImportPath(path)
   201  	if semver.Major(parts.Version) != parts.Version {
   202  		return &InvalidPathError{
   203  			Kind: "import",
   204  			Path: path,
   205  			Err:  fmt.Errorf("import paths can only contain a major version specifier"),
   206  		}
   207  	}
   208  	if err := checkPath(parts.Path, importPath); err != nil {
   209  		return &InvalidPathError{Kind: "import", Path: path, Err: err}
   210  	}
   211  	return nil
   212  }
   213  
   214  // pathKind indicates what kind of path we're checking. Module paths,
   215  // import paths, and file paths have different restrictions.
   216  type pathKind int
   217  
   218  const (
   219  	modulePath pathKind = iota
   220  	importPath
   221  	filePath
   222  )
   223  
   224  // checkPath checks that a general path is valid. kind indicates what
   225  // specific constraints should be applied.
   226  //
   227  // checkPath returns an error describing why the path is not valid.
   228  // Because these checks apply to module, import, and file paths,
   229  // and because other checks may be applied, the caller is expected to wrap
   230  // this error with InvalidPathError.
   231  func checkPath(path string, kind pathKind) error {
   232  	if !utf8.ValidString(path) {
   233  		return fmt.Errorf("invalid UTF-8")
   234  	}
   235  	if path == "" {
   236  		return fmt.Errorf("empty string")
   237  	}
   238  	if path[0] == '-' && kind != filePath {
   239  		return fmt.Errorf("leading dash")
   240  	}
   241  	if strings.Contains(path, "//") {
   242  		return fmt.Errorf("double slash")
   243  	}
   244  	if path[len(path)-1] == '/' {
   245  		return fmt.Errorf("trailing slash")
   246  	}
   247  	elemStart := 0
   248  	for i, r := range path {
   249  		if r == '/' {
   250  			if err := checkElem(path[elemStart:i], kind); err != nil {
   251  				return err
   252  			}
   253  			elemStart = i + 1
   254  		}
   255  	}
   256  	if err := checkElem(path[elemStart:], kind); err != nil {
   257  		return err
   258  	}
   259  	return nil
   260  }
   261  
   262  // checkElem checks whether an individual path element is valid.
   263  func checkElem(elem string, kind pathKind) error {
   264  	if elem == "" {
   265  		return fmt.Errorf("empty path element")
   266  	}
   267  	if strings.Count(elem, ".") == len(elem) {
   268  		return fmt.Errorf("invalid path element %q", elem)
   269  	}
   270  
   271  	if kind == modulePath {
   272  
   273  		if r := rune(elem[0]); r == '.' || r == '_' || r == '-' {
   274  			return fmt.Errorf("leading %q in path element", r)
   275  		}
   276  		if r := rune(elem[len(elem)-1]); r == '.' || r == '_' || r == '-' {
   277  			return fmt.Errorf("trailing %q in path element", r)
   278  		}
   279  	} else if elem[len(elem)-1] == '.' {
   280  		return fmt.Errorf("trailing dot in path element")
   281  	}
   282  	for _, r := range elem {
   283  		ok := false
   284  		switch kind {
   285  		case modulePath:
   286  			ok = modPathOK(r)
   287  		case importPath:
   288  			ok = importPathOK(r)
   289  		case filePath:
   290  			ok = fileNameOK(r)
   291  		default:
   292  			panic(fmt.Sprintf("internal error: invalid kind %v", kind))
   293  		}
   294  		if !ok {
   295  			return fmt.Errorf("invalid char %q", r)
   296  		}
   297  	}
   298  	// Windows disallows a bunch of path elements, sadly.
   299  	// See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
   300  	short := elem
   301  	if i := strings.Index(short, "."); i >= 0 {
   302  		short = short[:i]
   303  	}
   304  	for _, bad := range badWindowsNames {
   305  		if strings.EqualFold(bad, short) {
   306  			return fmt.Errorf("%q disallowed as path element component on Windows", short)
   307  		}
   308  	}
   309  
   310  	if kind == filePath {
   311  		// don't check for Windows short-names in file names. They're
   312  		// only an issue for import paths.
   313  		return nil
   314  	}
   315  
   316  	// Reject path components that look like Windows short-names.
   317  	// Those usually end in a tilde followed by one or more ASCII digits.
   318  	if tilde := strings.LastIndexByte(short, '~'); tilde >= 0 && tilde < len(short)-1 {
   319  		suffix := short[tilde+1:]
   320  		suffixIsDigits := true
   321  		for _, r := range suffix {
   322  			if r < '0' || r > '9' {
   323  				suffixIsDigits = false
   324  				break
   325  			}
   326  		}
   327  		if suffixIsDigits {
   328  			return fmt.Errorf("trailing tilde and digits in path element")
   329  		}
   330  	}
   331  
   332  	return nil
   333  }
   334  
   335  // CheckFilePath checks that a slash-separated file path is valid.
   336  // The definition of a valid file path is the same as the definition
   337  // of a valid import path except that the set of allowed characters is larger:
   338  // all Unicode letters, ASCII digits, the ASCII space character (U+0020),
   339  // and the ASCII punctuation characters
   340  // “!#$%&()+,-.=@[]^_{}~”.
   341  // (The excluded punctuation characters, " * < > ? ` ' | / \ and :,
   342  // have special meanings in certain shells or operating systems.)
   343  //
   344  // CheckFilePath may be less restrictive in the future, but see the
   345  // top-level package documentation for additional information about
   346  // subtleties of Unicode.
   347  func CheckFilePath(path string) error {
   348  	if err := checkPath(path, filePath); err != nil {
   349  		return &InvalidPathError{Kind: "file", Path: path, Err: err}
   350  	}
   351  	return nil
   352  }
   353  
   354  // badWindowsNames are the reserved file path elements on Windows.
   355  // See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
   356  var badWindowsNames = []string{
   357  	"CON",
   358  	"PRN",
   359  	"AUX",
   360  	"NUL",
   361  	"COM1",
   362  	"COM2",
   363  	"COM3",
   364  	"COM4",
   365  	"COM5",
   366  	"COM6",
   367  	"COM7",
   368  	"COM8",
   369  	"COM9",
   370  	"LPT1",
   371  	"LPT2",
   372  	"LPT3",
   373  	"LPT4",
   374  	"LPT5",
   375  	"LPT6",
   376  	"LPT7",
   377  	"LPT8",
   378  	"LPT9",
   379  }
   380  
   381  // SplitPathVersion returns a prefix and version suffix such
   382  // that prefix+"@"+version == path.
   383  // SplitPathVersion returns with ok=false when presented
   384  // with a path with an invalid version suffix.
   385  //
   386  // For example, SplitPathVersion("foo.com/bar@v0.1") returns
   387  // ("foo.com/bar", "v0.1", true).
   388  func SplitPathVersion(path string) (prefix, version string, ok bool) {
   389  	i := strings.LastIndex(path, "@")
   390  	split := i
   391  	if i <= 0 || i+2 >= len(path) {
   392  		return "", "", false
   393  	}
   394  	if strings.Contains(path[:i], "@") {
   395  		return "", "", false
   396  	}
   397  	if path[i+1] != 'v' {
   398  		return "", "", false
   399  	}
   400  	if !semver.IsValid(path[i+1:]) {
   401  		return "", "", false
   402  	}
   403  	return path[:split], path[split+1:], true
   404  }
   405  
   406  // ImportPath holds the various components of an import path.
   407  type ImportPath struct {
   408  	// Path holds the base package/directory path, similar
   409  	// to that returned by [Version.BasePath].
   410  	Path string
   411  
   412  	// Version holds the version of the import
   413  	// or empty if not present. Note: in general this
   414  	// will contain a major version only, but there's no
   415  	// guarantee of that.
   416  	Version string
   417  
   418  	// Qualifier holds the package qualifier within the path.
   419  	// This will be derived from the last component of Path
   420  	// if it wasn't explicitly present in the import path.
   421  	// This is not guaranteed to be a valid CUE identifier.
   422  	Qualifier string
   423  
   424  	// ExplicitQualifier holds whether the qualifier will
   425  	// always be added regardless of whether it matches
   426  	// the final path element.
   427  	ExplicitQualifier bool
   428  }
   429  
   430  // Canonical returns the canonical form of the import path.
   431  // Specifically, it will only include the package qualifier
   432  // if it's different from the last component of parts.Path.
   433  func (parts ImportPath) Canonical() ImportPath {
   434  	if i := strings.LastIndex(parts.Path, "/"); i >= 0 && parts.Path[i+1:] == parts.Qualifier {
   435  		parts.Qualifier = ""
   436  		parts.ExplicitQualifier = false
   437  	}
   438  	return parts
   439  }
   440  
   441  // Unqualified returns the import path without any package qualifier.
   442  func (parts ImportPath) Unqualified() ImportPath {
   443  	parts.Qualifier = ""
   444  	parts.ExplicitQualifier = false
   445  	return parts
   446  }
   447  
   448  func (parts ImportPath) String() string {
   449  	needQualifier := parts.ExplicitQualifier
   450  	if !needQualifier && parts.Qualifier != "" {
   451  		_, last, _ := cutLast(parts.Path, "/")
   452  		if last != "" && last != parts.Qualifier {
   453  			needQualifier = true
   454  		}
   455  	}
   456  	if parts.Version == "" && !needQualifier {
   457  		// Fast path.
   458  		return parts.Path
   459  	}
   460  	var buf strings.Builder
   461  	buf.WriteString(parts.Path)
   462  	if parts.Version != "" {
   463  		buf.WriteByte('@')
   464  		buf.WriteString(parts.Version)
   465  	}
   466  	if needQualifier {
   467  		buf.WriteByte(':')
   468  		buf.WriteString(parts.Qualifier)
   469  	}
   470  	return buf.String()
   471  }
   472  
   473  // ParseImportPath returns the various components of an import path.
   474  // It does not check the result for validity.
   475  func ParseImportPath(p string) ImportPath {
   476  	var parts ImportPath
   477  	pathWithoutQualifier := p
   478  	if i := strings.LastIndexAny(p, "/:"); i >= 0 && p[i] == ':' {
   479  		pathWithoutQualifier = p[:i]
   480  		parts.Qualifier = p[i+1:]
   481  		parts.ExplicitQualifier = true
   482  	}
   483  	parts.Path = pathWithoutQualifier
   484  	if path, version, ok := SplitPathVersion(pathWithoutQualifier); ok {
   485  		parts.Version = version
   486  		parts.Path = path
   487  	}
   488  	if !parts.ExplicitQualifier {
   489  		if i := strings.LastIndex(parts.Path, "/"); i >= 0 {
   490  			parts.Qualifier = parts.Path[i+1:]
   491  		} else {
   492  			parts.Qualifier = parts.Path
   493  		}
   494  		if !ast.IsValidIdent(parts.Qualifier) || strings.HasPrefix(parts.Qualifier, "#") || parts.Qualifier == "_" {
   495  			parts.Qualifier = ""
   496  		}
   497  	}
   498  	return parts
   499  }
   500  
   501  // CheckPathMajor returns a non-nil error if the semantic version v
   502  // does not match the path major version pathMajor.
   503  func CheckPathMajor(v, pathMajor string) error {
   504  	if m := semver.Major(v); m != pathMajor {
   505  		return &InvalidVersionError{
   506  			Version: v,
   507  			Err:     fmt.Errorf("should be %s, not %s", pathMajor, m),
   508  		}
   509  	}
   510  	return nil
   511  }
   512  
   513  func cutLast(s, sep string) (before, after string, found bool) {
   514  	if i := strings.LastIndex(s, sep); i >= 0 {
   515  		return s[:i], s[i+len(sep):], true
   516  	}
   517  	return "", s, false
   518  }