github.com/sirkon/goproxy@v1.4.8/internal/module/module.go (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package module defines the module.Version type 6 // along with support code. 7 package module 8 9 // IMPORTANT NOTE 10 // 11 // This file essentially defines the set of valid import paths for the go command. 12 // There are many subtle considerations, including Unicode ambiguity, 13 // security, network, and file system representations. 14 // 15 // This file also defines the set of valid module path and version combinations, 16 // another topic with many subtle considerations. 17 // 18 // Changes to the semantics in this file require approval from rsc. 19 20 import ( 21 "fmt" 22 "sort" 23 "strings" 24 "unicode" 25 "unicode/utf8" 26 27 "github.com/sirkon/goproxy/internal/semver" 28 ) 29 30 // A Version is defined by a module path and version pair. 31 type Version struct { 32 Path string 33 34 // Version is usually a semantic version in canonical form. 35 // There are two exceptions to this general rule. 36 // First, the top-level target of a build has no specific version 37 // and uses Version = "". 38 // Second, during MVS calculations the version "none" is used 39 // to represent the decision to take no version of a given module. 40 Version string `json:",omitempty"` 41 } 42 43 // Check checks that a given module path, version pair is valid. 44 // In addition to the path being a valid module path 45 // and the version being a valid semantic version, 46 // the two must correspond. 47 // For example, the path "yaml/v2" only corresponds to 48 // semantic versions beginning with "v2.". 49 func Check(path, version string) error { 50 if err := CheckPath(path); err != nil { 51 return err 52 } 53 if !semver.IsValid(version) { 54 return fmt.Errorf("malformed semantic version %v", version) 55 } 56 _, pathMajor, _ := SplitPathVersion(path) 57 if !MatchPathMajor(version, pathMajor) { 58 if pathMajor == "" { 59 pathMajor = "v0 or v1" 60 } 61 if pathMajor[0] == '.' { // .v1 62 pathMajor = pathMajor[1:] 63 } 64 return fmt.Errorf("mismatched module path %v and version %v (want %v)", path, version, pathMajor) 65 } 66 return nil 67 } 68 69 // firstPathOK reports whether r can appear in the first element of a module path. 70 // The first element of the path must be an LDH domain name, at least for now. 71 // To avoid case ambiguity, the domain name must be entirely lower case. 72 func firstPathOK(r rune) bool { 73 return r == '-' || r == '.' || 74 '0' <= r && r <= '9' || 75 'a' <= r && r <= 'z' 76 } 77 78 // pathOK reports whether r can appear in an import path element. 79 // Paths can be ASCII letters, ASCII digits, and limited ASCII punctuation: + - . _ and ~. 80 // This matches what "go get" has historically recognized in import paths. 81 // TODO(rsc): We would like to allow Unicode letters, but that requires additional 82 // care in the safe encoding (see note below). 83 func pathOK(r rune) bool { 84 if r < utf8.RuneSelf { 85 return r == '+' || r == '-' || r == '.' || r == '_' || r == '~' || 86 '0' <= r && r <= '9' || 87 'A' <= r && r <= 'Z' || 88 'a' <= r && r <= 'z' 89 } 90 return false 91 } 92 93 // fileNameOK reports whether r can appear in a file name. 94 // For now we allow all Unicode letters but otherwise limit to pathOK plus a few more punctuation characters. 95 // If we expand the set of allowed characters here, we have to 96 // work harder at detecting potential case-folding and normalization collisions. 97 // See note about "safe encoding" below. 98 func fileNameOK(r rune) bool { 99 if r < utf8.RuneSelf { 100 // Entire set of ASCII punctuation, from which we remove characters: 101 // ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ 102 // We disallow some shell special characters: " ' * < > ? ` | 103 // (Note that some of those are disallowed by the Windows file system as well.) 104 // We also disallow path separators / : and \ (fileNameOK is only called on path element characters). 105 // We allow spaces (U+0020) in file names. 106 const allowed = "!#$%&()+,-.=@[]^_{}~ " 107 if '0' <= r && r <= '9' || 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' { 108 return true 109 } 110 for i := 0; i < len(allowed); i++ { 111 if rune(allowed[i]) == r { 112 return true 113 } 114 } 115 return false 116 } 117 // It may be OK to add more ASCII punctuation here, but only carefully. 118 // For example Windows disallows < > \, and macOS disallows :, so we must not allow those. 119 return unicode.IsLetter(r) 120 } 121 122 // CheckPath checks that a module path is valid. 123 func CheckPath(path string) error { 124 if err := checkPath(path, false); err != nil { 125 return fmt.Errorf("malformed module path %q: %v", path, err) 126 } 127 i := strings.Index(path, "/") 128 if i < 0 { 129 i = len(path) 130 } 131 if i == 0 { 132 return fmt.Errorf("malformed module path %q: leading slash", path) 133 } 134 if !strings.Contains(path[:i], ".") { 135 return fmt.Errorf("malformed module path %q: missing dot in first path element", path) 136 } 137 if path[0] == '-' { 138 return fmt.Errorf("malformed module path %q: leading dash in first path element", path) 139 } 140 for _, r := range path[:i] { 141 if !firstPathOK(r) { 142 return fmt.Errorf("malformed module path %q: invalid char %q in first path element", path, r) 143 } 144 } 145 if _, _, ok := SplitPathVersion(path); !ok { 146 return fmt.Errorf("malformed module path %q: invalid version", path) 147 } 148 return nil 149 } 150 151 // CheckImportPath checks that an import path is valid. 152 func CheckImportPath(path string) error { 153 if err := checkPath(path, false); err != nil { 154 return fmt.Errorf("malformed import path %q: %v", path, err) 155 } 156 return nil 157 } 158 159 // checkPath checks that a general path is valid. 160 // It returns an error describing why but not mentioning path. 161 // Because these checks apply to both module paths and import paths, 162 // the caller is expected to add the "malformed ___ path %q: " prefix. 163 // fileName indicates whether the final element of the path is a file name 164 // (as opposed to a directory name). 165 func checkPath(path string, fileName bool) error { 166 if !utf8.ValidString(path) { 167 return fmt.Errorf("invalid UTF-8") 168 } 169 if path == "" { 170 return fmt.Errorf("empty string") 171 } 172 if strings.Contains(path, "..") { 173 return fmt.Errorf("double dot") 174 } 175 if strings.Contains(path, "//") { 176 return fmt.Errorf("double slash") 177 } 178 if path[len(path)-1] == '/' { 179 return fmt.Errorf("trailing slash") 180 } 181 elemStart := 0 182 for i, r := range path { 183 if r == '/' { 184 if err := checkElem(path[elemStart:i], fileName); err != nil { 185 return err 186 } 187 elemStart = i + 1 188 } 189 } 190 if err := checkElem(path[elemStart:], fileName); err != nil { 191 return err 192 } 193 return nil 194 } 195 196 // checkElem checks whether an individual path element is valid. 197 // fileName indicates whether the element is a file name (not a directory name). 198 func checkElem(elem string, fileName bool) error { 199 if elem == "" { 200 return fmt.Errorf("empty path element") 201 } 202 if strings.Count(elem, ".") == len(elem) { 203 return fmt.Errorf("invalid path element %q", elem) 204 } 205 if elem[0] == '.' && !fileName { 206 return fmt.Errorf("leading dot in path element") 207 } 208 if elem[len(elem)-1] == '.' { 209 return fmt.Errorf("trailing dot in path element") 210 } 211 charOK := pathOK 212 if fileName { 213 charOK = fileNameOK 214 } 215 for _, r := range elem { 216 if !charOK(r) { 217 return fmt.Errorf("invalid char %q", r) 218 } 219 } 220 221 // Windows disallows a bunch of path elements, sadly. 222 // See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file 223 short := elem 224 if i := strings.Index(short, "."); i >= 0 { 225 short = short[:i] 226 } 227 for _, bad := range badWindowsNames { 228 if strings.EqualFold(bad, short) { 229 return fmt.Errorf("disallowed path element %q", elem) 230 } 231 } 232 return nil 233 } 234 235 // CheckFilePath checks whether a slash-separated file path is valid. 236 func CheckFilePath(path string) error { 237 if err := checkPath(path, true); err != nil { 238 return fmt.Errorf("malformed file path %q: %v", path, err) 239 } 240 return nil 241 } 242 243 // badWindowsNames are the reserved file path elements on Windows. 244 // See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file 245 var badWindowsNames = []string{ 246 "CON", 247 "PRN", 248 "AUX", 249 "NUL", 250 "COM1", 251 "COM2", 252 "COM3", 253 "COM4", 254 "COM5", 255 "COM6", 256 "COM7", 257 "COM8", 258 "COM9", 259 "LPT1", 260 "LPT2", 261 "LPT3", 262 "LPT4", 263 "LPT5", 264 "LPT6", 265 "LPT7", 266 "LPT8", 267 "LPT9", 268 } 269 270 // SplitPathVersion returns prefix and major version such that prefix+pathMajor == path 271 // and version is either empty or "/vN" for N >= 2. 272 // As a special case, gopkg.in paths are recognized directly; 273 // they require ".vN" instead of "/vN", and for all N, not just N >= 2. 274 func SplitPathVersion(path string) (prefix, pathMajor string, ok bool) { 275 if strings.HasPrefix(path, "gopkg.in/") { 276 return splitGopkgIn(path) 277 } 278 279 i := len(path) 280 dot := false 281 for i > 0 && ('0' <= path[i-1] && path[i-1] <= '9' || path[i-1] == '.') { 282 if path[i-1] == '.' { 283 dot = true 284 } 285 i-- 286 } 287 if i <= 1 || path[i-1] != 'v' || path[i-2] != '/' { 288 return path, "", true 289 } 290 prefix, pathMajor = path[:i-2], path[i-2:] 291 if dot || len(pathMajor) <= 2 || pathMajor[2] == '0' || pathMajor == "/v1" { 292 return path, "", false 293 } 294 return prefix, pathMajor, true 295 } 296 297 // splitGopkgIn is like SplitPathVersion but only for gopkg.in paths. 298 func splitGopkgIn(path string) (prefix, pathMajor string, ok bool) { 299 if !strings.HasPrefix(path, "gopkg.in/") { 300 return path, "", false 301 } 302 i := len(path) 303 if strings.HasSuffix(path, "-unstable") { 304 i -= len("-unstable") 305 } 306 for i > 0 && ('0' <= path[i-1] && path[i-1] <= '9') { 307 i-- 308 } 309 if i <= 1 || path[i-1] != 'v' || path[i-2] != '.' { 310 // All gopkg.in paths must end in vN for some N. 311 return path, "", false 312 } 313 prefix, pathMajor = path[:i-2], path[i-2:] 314 if len(pathMajor) <= 2 || pathMajor[2] == '0' && pathMajor != ".v0" { 315 return path, "", false 316 } 317 return prefix, pathMajor, true 318 } 319 320 // MatchPathMajor reports whether the semantic version v 321 // matches the path major version pathMajor. 322 func MatchPathMajor(v, pathMajor string) bool { 323 if strings.HasPrefix(pathMajor, ".v") && strings.HasSuffix(pathMajor, "-unstable") { 324 pathMajor = strings.TrimSuffix(pathMajor, "-unstable") 325 } 326 if strings.HasPrefix(v, "v0.0.0-") && pathMajor == ".v1" { 327 // Allow old bug in pseudo-versions that generated v0.0.0- pseudoversion for gopkg .v1. 328 // For example, gopkg.in/yaml.v2@v2.2.1's go.mod requires gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405. 329 return true 330 } 331 m := semver.Major(v) 332 if pathMajor == "" { 333 return m == "v0" || m == "v1" || semver.Build(v) == "+incompatible" 334 } 335 return (pathMajor[0] == '/' || pathMajor[0] == '.') && m == pathMajor[1:] 336 } 337 338 // CanonicalVersion returns the canonical form of the version string v. 339 // It is the same as semver.Canonical(v) except that it preserves the special build suffix "+incompatible". 340 func CanonicalVersion(v string) string { 341 cv := semver.Canonical(v) 342 if semver.Build(v) == "+incompatible" { 343 cv += "+incompatible" 344 } 345 return cv 346 } 347 348 // Sort sorts the list by Path, breaking ties by comparing Versions. 349 func Sort(list []Version) { 350 sort.Slice(list, func(i, j int) bool { 351 mi := list[i] 352 mj := list[j] 353 if mi.Path != mj.Path { 354 return mi.Path < mj.Path 355 } 356 // To help go.sum formatting, allow version/file. 357 // Compare semver prefix by semver rules, 358 // file by string order. 359 vi := mi.Version 360 vj := mj.Version 361 var fi, fj string 362 if k := strings.Index(vi, "/"); k >= 0 { 363 vi, fi = vi[:k], vi[k:] 364 } 365 if k := strings.Index(vj, "/"); k >= 0 { 366 vj, fj = vj[:k], vj[k:] 367 } 368 if vi != vj { 369 return semver.Compare(vi, vj) < 0 370 } 371 return fi < fj 372 }) 373 } 374 375 // Safe encodings 376 // 377 // Module paths appear as substrings of file system paths 378 // (in the download cache) and of web server URLs in the proxy protocol. 379 // In general we cannot rely on file systems to be case-sensitive, 380 // nor can we rely on web servers, since they read from file systems. 381 // That is, we cannot rely on the file system to keep rsc.io/QUOTE 382 // and rsc.io/quote separate. Windows and macOS don't. 383 // Instead, we must never require two different casings of a file path. 384 // Because we want the download cache to match the proxy protocol, 385 // and because we want the proxy protocol to be possible to serve 386 // from a tree of static files (which might be stored on a case-insensitive 387 // file system), the proxy protocol must never require two different casings 388 // of a URL path either. 389 // 390 // One possibility would be to make the safe encoding be the lowercase 391 // hexadecimal encoding of the actual path bytes. This would avoid ever 392 // needing different casings of a file path, but it would be fairly illegible 393 // to most programmers when those paths appeared in the file system 394 // (including in file paths in compiler errors and stack traces) 395 // in web server logs, and so on. Instead, we want a safe encoding that 396 // leaves most paths unaltered. 397 // 398 // The safe encoding is this: 399 // replace every uppercase letter with an exclamation mark 400 // followed by the letter's lowercase equivalent. 401 // 402 // For example, 403 // github.com/Azure/azure-sdk-for-go -> github.com/!azure/azure-sdk-for-go. 404 // github.com/GoogleCloudPlatform/cloudsql-proxy -> github.com/!google!cloud!platform/cloudsql-proxy 405 // github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus. 406 // 407 // Import paths that avoid upper-case letters are left unchanged. 408 // Note that because import paths are ASCII-only and avoid various 409 // problematic punctuation (like : < and >), the safe encoding is also ASCII-only 410 // and avoids the same problematic punctuation. 411 // 412 // Import paths have never allowed exclamation marks, so there is no 413 // need to define how to encode a literal !. 414 // 415 // Although paths are disallowed from using Unicode (see pathOK above), 416 // the eventual plan is to allow Unicode letters as well, to assume that 417 // file systems and URLs are Unicode-safe (storing UTF-8), and apply 418 // the !-for-uppercase convention. Note however that not all runes that 419 // are different but case-fold equivalent are an upper/lower pair. 420 // For example, U+004B ('K'), U+006B ('k'), and U+212A ('K' for Kelvin) 421 // are considered to case-fold to each other. When we do add Unicode 422 // letters, we must not assume that upper/lower are the only case-equivalent pairs. 423 // Perhaps the Kelvin symbol would be disallowed entirely, for example. 424 // Or perhaps it would encode as "!!k", or perhaps as "(212A)". 425 // 426 // Also, it would be nice to allow Unicode marks as well as letters, 427 // but marks include combining marks, and then we must deal not 428 // only with case folding but also normalization: both U+00E9 ('é') 429 // and U+0065 U+0301 ('e' followed by combining acute accent) 430 // look the same on the page and are treated by some file systems 431 // as the same path. If we do allow Unicode marks in paths, there 432 // must be some kind of normalization to allow only one canonical 433 // encoding of any character used in an import path. 434 435 // EncodePath returns the safe encoding of the given module path. 436 // It fails if the module path is invalid. 437 func EncodePath(path string) (encoding string, err error) { 438 if err := CheckPath(path); err != nil { 439 return "", err 440 } 441 442 return encodeString(path) 443 } 444 445 // EncodeVersion returns the safe encoding of the given module version. 446 // Versions are allowed to be in non-semver form but must be valid file names 447 // and not contain exclamation marks. 448 func EncodeVersion(v string) (encoding string, err error) { 449 if err := checkElem(v, true); err != nil || strings.Contains(v, "!") { 450 return "", fmt.Errorf("disallowed version string %q", v) 451 } 452 return encodeString(v) 453 } 454 455 func encodeString(s string) (encoding string, err error) { 456 haveUpper := false 457 for _, r := range s { 458 if r == '!' || r >= utf8.RuneSelf { 459 // This should be disallowed by CheckPath, but diagnose anyway. 460 // The correctness of the encoding loop below depends on it. 461 return "", fmt.Errorf("internal error: inconsistency in EncodePath") 462 } 463 if 'A' <= r && r <= 'Z' { 464 haveUpper = true 465 } 466 } 467 468 if !haveUpper { 469 return s, nil 470 } 471 472 var buf []byte 473 for _, r := range s { 474 if 'A' <= r && r <= 'Z' { 475 buf = append(buf, '!', byte(r+'a'-'A')) 476 } else { 477 buf = append(buf, byte(r)) 478 } 479 } 480 return string(buf), nil 481 } 482 483 // DecodePath returns the module path of the given safe encoding. 484 // It fails if the encoding is invalid or encodes an invalid path. 485 func DecodePath(encoding string) (path string, err error) { 486 path, ok := decodeString(encoding) 487 if !ok { 488 return "", fmt.Errorf("invalid module path encoding %q", encoding) 489 } 490 if err := CheckPath(path); err != nil { 491 return "", fmt.Errorf("invalid module path encoding %q: %v", encoding, err) 492 } 493 return path, nil 494 } 495 496 // DecodeVersion returns the version string for the given safe encoding. 497 // It fails if the encoding is invalid or encodes an invalid version. 498 // Versions are allowed to be in non-semver form but must be valid file names 499 // and not contain exclamation marks. 500 func DecodeVersion(encoding string) (v string, err error) { 501 v, ok := decodeString(encoding) 502 if !ok { 503 return "", fmt.Errorf("invalid version encoding %q", encoding) 504 } 505 if err := checkElem(v, true); err != nil { 506 return "", fmt.Errorf("disallowed version string %q", v) 507 } 508 return v, nil 509 } 510 511 func decodeString(encoding string) (string, bool) { 512 var buf []byte 513 514 bang := false 515 for _, r := range encoding { 516 if r >= utf8.RuneSelf { 517 return "", false 518 } 519 if bang { 520 bang = false 521 if r < 'a' || 'z' < r { 522 return "", false 523 } 524 buf = append(buf, byte(r+'A'-'a')) 525 continue 526 } 527 if r == '!' { 528 bang = true 529 continue 530 } 531 if 'A' <= r && r <= 'Z' { 532 return "", false 533 } 534 buf = append(buf, byte(r)) 535 } 536 if bang { 537 return "", false 538 } 539 return string(buf), true 540 }