github.com/mdaxf/iac@v0.0.0-20240519030858-58a061660378/vendor_skip/golang.org/x/net/idna/idna9.0.0.go (about) 1 // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. 2 3 // Copyright 2016 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 //go:build !go1.10 8 9 // Package idna implements IDNA2008 using the compatibility processing 10 // defined by UTS (Unicode Technical Standard) #46, which defines a standard to 11 // deal with the transition from IDNA2003. 12 // 13 // IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC 14 // 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894. 15 // UTS #46 is defined in https://www.unicode.org/reports/tr46. 16 // See https://unicode.org/cldr/utility/idna.jsp for a visualization of the 17 // differences between these two standards. 18 package idna // import "golang.org/x/net/idna" 19 20 import ( 21 "fmt" 22 "strings" 23 "unicode/utf8" 24 25 "golang.org/x/text/secure/bidirule" 26 "golang.org/x/text/unicode/norm" 27 ) 28 29 // NOTE: Unlike common practice in Go APIs, the functions will return a 30 // sanitized domain name in case of errors. Browsers sometimes use a partially 31 // evaluated string as lookup. 32 // TODO: the current error handling is, in my opinion, the least opinionated. 33 // Other strategies are also viable, though: 34 // Option 1) Return an empty string in case of error, but allow the user to 35 // specify explicitly which errors to ignore. 36 // Option 2) Return the partially evaluated string if it is itself a valid 37 // string, otherwise return the empty string in case of error. 38 // Option 3) Option 1 and 2. 39 // Option 4) Always return an empty string for now and implement Option 1 as 40 // needed, and document that the return string may not be empty in case of 41 // error in the future. 42 // I think Option 1 is best, but it is quite opinionated. 43 44 // ToASCII is a wrapper for Punycode.ToASCII. 45 func ToASCII(s string) (string, error) { 46 return Punycode.process(s, true) 47 } 48 49 // ToUnicode is a wrapper for Punycode.ToUnicode. 50 func ToUnicode(s string) (string, error) { 51 return Punycode.process(s, false) 52 } 53 54 // An Option configures a Profile at creation time. 55 type Option func(*options) 56 57 // Transitional sets a Profile to use the Transitional mapping as defined in UTS 58 // #46. This will cause, for example, "ß" to be mapped to "ss". Using the 59 // transitional mapping provides a compromise between IDNA2003 and IDNA2008 60 // compatibility. It is used by some browsers when resolving domain names. This 61 // option is only meaningful if combined with MapForLookup. 62 func Transitional(transitional bool) Option { 63 return func(o *options) { o.transitional = transitional } 64 } 65 66 // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts 67 // are longer than allowed by the RFC. 68 // 69 // This option corresponds to the VerifyDnsLength flag in UTS #46. 70 func VerifyDNSLength(verify bool) Option { 71 return func(o *options) { o.verifyDNSLength = verify } 72 } 73 74 // RemoveLeadingDots removes leading label separators. Leading runes that map to 75 // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well. 76 func RemoveLeadingDots(remove bool) Option { 77 return func(o *options) { o.removeLeadingDots = remove } 78 } 79 80 // ValidateLabels sets whether to check the mandatory label validation criteria 81 // as defined in Section 5.4 of RFC 5891. This includes testing for correct use 82 // of hyphens ('-'), normalization, validity of runes, and the context rules. 83 // In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags 84 // in UTS #46. 85 func ValidateLabels(enable bool) Option { 86 return func(o *options) { 87 // Don't override existing mappings, but set one that at least checks 88 // normalization if it is not set. 89 if o.mapping == nil && enable { 90 o.mapping = normalize 91 } 92 o.trie = trie 93 o.checkJoiners = enable 94 o.checkHyphens = enable 95 if enable { 96 o.fromPuny = validateFromPunycode 97 } else { 98 o.fromPuny = nil 99 } 100 } 101 } 102 103 // CheckHyphens sets whether to check for correct use of hyphens ('-') in 104 // labels. Most web browsers do not have this option set, since labels such as 105 // "r3---sn-apo3qvuoxuxbt-j5pe" are in common use. 106 // 107 // This option corresponds to the CheckHyphens flag in UTS #46. 108 func CheckHyphens(enable bool) Option { 109 return func(o *options) { o.checkHyphens = enable } 110 } 111 112 // CheckJoiners sets whether to check the ContextJ rules as defined in Appendix 113 // A of RFC 5892, concerning the use of joiner runes. 114 // 115 // This option corresponds to the CheckJoiners flag in UTS #46. 116 func CheckJoiners(enable bool) Option { 117 return func(o *options) { 118 o.trie = trie 119 o.checkJoiners = enable 120 } 121 } 122 123 // StrictDomainName limits the set of permissible ASCII characters to those 124 // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the 125 // hyphen). This is set by default for MapForLookup and ValidateForRegistration, 126 // but is only useful if ValidateLabels is set. 127 // 128 // This option is useful, for instance, for browsers that allow characters 129 // outside this range, for example a '_' (U+005F LOW LINE). See 130 // http://www.rfc-editor.org/std/std3.txt for more details. 131 // 132 // This option corresponds to the UseSTD3ASCIIRules flag in UTS #46. 133 func StrictDomainName(use bool) Option { 134 return func(o *options) { o.useSTD3Rules = use } 135 } 136 137 // NOTE: the following options pull in tables. The tables should not be linked 138 // in as long as the options are not used. 139 140 // BidiRule enables the Bidi rule as defined in RFC 5893. Any application 141 // that relies on proper validation of labels should include this rule. 142 // 143 // This option corresponds to the CheckBidi flag in UTS #46. 144 func BidiRule() Option { 145 return func(o *options) { o.bidirule = bidirule.ValidString } 146 } 147 148 // ValidateForRegistration sets validation options to verify that a given IDN is 149 // properly formatted for registration as defined by Section 4 of RFC 5891. 150 func ValidateForRegistration() Option { 151 return func(o *options) { 152 o.mapping = validateRegistration 153 StrictDomainName(true)(o) 154 ValidateLabels(true)(o) 155 VerifyDNSLength(true)(o) 156 BidiRule()(o) 157 } 158 } 159 160 // MapForLookup sets validation and mapping options such that a given IDN is 161 // transformed for domain name lookup according to the requirements set out in 162 // Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894, 163 // RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option 164 // to add this check. 165 // 166 // The mappings include normalization and mapping case, width and other 167 // compatibility mappings. 168 func MapForLookup() Option { 169 return func(o *options) { 170 o.mapping = validateAndMap 171 StrictDomainName(true)(o) 172 ValidateLabels(true)(o) 173 RemoveLeadingDots(true)(o) 174 } 175 } 176 177 type options struct { 178 transitional bool 179 useSTD3Rules bool 180 checkHyphens bool 181 checkJoiners bool 182 verifyDNSLength bool 183 removeLeadingDots bool 184 185 trie *idnaTrie 186 187 // fromPuny calls validation rules when converting A-labels to U-labels. 188 fromPuny func(p *Profile, s string) error 189 190 // mapping implements a validation and mapping step as defined in RFC 5895 191 // or UTS 46, tailored to, for example, domain registration or lookup. 192 mapping func(p *Profile, s string) (string, error) 193 194 // bidirule, if specified, checks whether s conforms to the Bidi Rule 195 // defined in RFC 5893. 196 bidirule func(s string) bool 197 } 198 199 // A Profile defines the configuration of a IDNA mapper. 200 type Profile struct { 201 options 202 } 203 204 func apply(o *options, opts []Option) { 205 for _, f := range opts { 206 f(o) 207 } 208 } 209 210 // New creates a new Profile. 211 // 212 // With no options, the returned Profile is the most permissive and equals the 213 // Punycode Profile. Options can be passed to further restrict the Profile. The 214 // MapForLookup and ValidateForRegistration options set a collection of options, 215 // for lookup and registration purposes respectively, which can be tailored by 216 // adding more fine-grained options, where later options override earlier 217 // options. 218 func New(o ...Option) *Profile { 219 p := &Profile{} 220 apply(&p.options, o) 221 return p 222 } 223 224 // ToASCII converts a domain or domain label to its ASCII form. For example, 225 // ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and 226 // ToASCII("golang") is "golang". If an error is encountered it will return 227 // an error and a (partially) processed result. 228 func (p *Profile) ToASCII(s string) (string, error) { 229 return p.process(s, true) 230 } 231 232 // ToUnicode converts a domain or domain label to its Unicode form. For example, 233 // ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and 234 // ToUnicode("golang") is "golang". If an error is encountered it will return 235 // an error and a (partially) processed result. 236 func (p *Profile) ToUnicode(s string) (string, error) { 237 pp := *p 238 pp.transitional = false 239 return pp.process(s, false) 240 } 241 242 // String reports a string with a description of the profile for debugging 243 // purposes. The string format may change with different versions. 244 func (p *Profile) String() string { 245 s := "" 246 if p.transitional { 247 s = "Transitional" 248 } else { 249 s = "NonTransitional" 250 } 251 if p.useSTD3Rules { 252 s += ":UseSTD3Rules" 253 } 254 if p.checkHyphens { 255 s += ":CheckHyphens" 256 } 257 if p.checkJoiners { 258 s += ":CheckJoiners" 259 } 260 if p.verifyDNSLength { 261 s += ":VerifyDNSLength" 262 } 263 return s 264 } 265 266 var ( 267 // Punycode is a Profile that does raw punycode processing with a minimum 268 // of validation. 269 Punycode *Profile = punycode 270 271 // Lookup is the recommended profile for looking up domain names, according 272 // to Section 5 of RFC 5891. The exact configuration of this profile may 273 // change over time. 274 Lookup *Profile = lookup 275 276 // Display is the recommended profile for displaying domain names. 277 // The configuration of this profile may change over time. 278 Display *Profile = display 279 280 // Registration is the recommended profile for checking whether a given 281 // IDN is valid for registration, according to Section 4 of RFC 5891. 282 Registration *Profile = registration 283 284 punycode = &Profile{} 285 lookup = &Profile{options{ 286 transitional: true, 287 removeLeadingDots: true, 288 useSTD3Rules: true, 289 checkHyphens: true, 290 checkJoiners: true, 291 trie: trie, 292 fromPuny: validateFromPunycode, 293 mapping: validateAndMap, 294 bidirule: bidirule.ValidString, 295 }} 296 display = &Profile{options{ 297 useSTD3Rules: true, 298 removeLeadingDots: true, 299 checkHyphens: true, 300 checkJoiners: true, 301 trie: trie, 302 fromPuny: validateFromPunycode, 303 mapping: validateAndMap, 304 bidirule: bidirule.ValidString, 305 }} 306 registration = &Profile{options{ 307 useSTD3Rules: true, 308 verifyDNSLength: true, 309 checkHyphens: true, 310 checkJoiners: true, 311 trie: trie, 312 fromPuny: validateFromPunycode, 313 mapping: validateRegistration, 314 bidirule: bidirule.ValidString, 315 }} 316 317 // TODO: profiles 318 // Register: recommended for approving domain names: don't do any mappings 319 // but rather reject on invalid input. Bundle or block deviation characters. 320 ) 321 322 type labelError struct{ label, code_ string } 323 324 func (e labelError) code() string { return e.code_ } 325 func (e labelError) Error() string { 326 return fmt.Sprintf("idna: invalid label %q", e.label) 327 } 328 329 type runeError rune 330 331 func (e runeError) code() string { return "P1" } 332 func (e runeError) Error() string { 333 return fmt.Sprintf("idna: disallowed rune %U", e) 334 } 335 336 // process implements the algorithm described in section 4 of UTS #46, 337 // see https://www.unicode.org/reports/tr46. 338 func (p *Profile) process(s string, toASCII bool) (string, error) { 339 var err error 340 if p.mapping != nil { 341 s, err = p.mapping(p, s) 342 } 343 // Remove leading empty labels. 344 if p.removeLeadingDots { 345 for ; len(s) > 0 && s[0] == '.'; s = s[1:] { 346 } 347 } 348 // It seems like we should only create this error on ToASCII, but the 349 // UTS 46 conformance tests suggests we should always check this. 350 if err == nil && p.verifyDNSLength && s == "" { 351 err = &labelError{s, "A4"} 352 } 353 labels := labelIter{orig: s} 354 for ; !labels.done(); labels.next() { 355 label := labels.label() 356 if label == "" { 357 // Empty labels are not okay. The label iterator skips the last 358 // label if it is empty. 359 if err == nil && p.verifyDNSLength { 360 err = &labelError{s, "A4"} 361 } 362 continue 363 } 364 if strings.HasPrefix(label, acePrefix) { 365 u, err2 := decode(label[len(acePrefix):]) 366 if err2 != nil { 367 if err == nil { 368 err = err2 369 } 370 // Spec says keep the old label. 371 continue 372 } 373 labels.set(u) 374 if err == nil && p.fromPuny != nil { 375 err = p.fromPuny(p, u) 376 } 377 if err == nil { 378 // This should be called on NonTransitional, according to the 379 // spec, but that currently does not have any effect. Use the 380 // original profile to preserve options. 381 err = p.validateLabel(u) 382 } 383 } else if err == nil { 384 err = p.validateLabel(label) 385 } 386 } 387 if toASCII { 388 for labels.reset(); !labels.done(); labels.next() { 389 label := labels.label() 390 if !ascii(label) { 391 a, err2 := encode(acePrefix, label) 392 if err == nil { 393 err = err2 394 } 395 label = a 396 labels.set(a) 397 } 398 n := len(label) 399 if p.verifyDNSLength && err == nil && (n == 0 || n > 63) { 400 err = &labelError{label, "A4"} 401 } 402 } 403 } 404 s = labels.result() 405 if toASCII && p.verifyDNSLength && err == nil { 406 // Compute the length of the domain name minus the root label and its dot. 407 n := len(s) 408 if n > 0 && s[n-1] == '.' { 409 n-- 410 } 411 if len(s) < 1 || n > 253 { 412 err = &labelError{s, "A4"} 413 } 414 } 415 return s, err 416 } 417 418 func normalize(p *Profile, s string) (string, error) { 419 return norm.NFC.String(s), nil 420 } 421 422 func validateRegistration(p *Profile, s string) (string, error) { 423 if !norm.NFC.IsNormalString(s) { 424 return s, &labelError{s, "V1"} 425 } 426 for i := 0; i < len(s); { 427 v, sz := trie.lookupString(s[i:]) 428 // Copy bytes not copied so far. 429 switch p.simplify(info(v).category()) { 430 // TODO: handle the NV8 defined in the Unicode idna data set to allow 431 // for strict conformance to IDNA2008. 432 case valid, deviation: 433 case disallowed, mapped, unknown, ignored: 434 r, _ := utf8.DecodeRuneInString(s[i:]) 435 return s, runeError(r) 436 } 437 i += sz 438 } 439 return s, nil 440 } 441 442 func validateAndMap(p *Profile, s string) (string, error) { 443 var ( 444 err error 445 b []byte 446 k int 447 ) 448 for i := 0; i < len(s); { 449 v, sz := trie.lookupString(s[i:]) 450 start := i 451 i += sz 452 // Copy bytes not copied so far. 453 switch p.simplify(info(v).category()) { 454 case valid: 455 continue 456 case disallowed: 457 if err == nil { 458 r, _ := utf8.DecodeRuneInString(s[start:]) 459 err = runeError(r) 460 } 461 continue 462 case mapped, deviation: 463 b = append(b, s[k:start]...) 464 b = info(v).appendMapping(b, s[start:i]) 465 case ignored: 466 b = append(b, s[k:start]...) 467 // drop the rune 468 case unknown: 469 b = append(b, s[k:start]...) 470 b = append(b, "\ufffd"...) 471 } 472 k = i 473 } 474 if k == 0 { 475 // No changes so far. 476 s = norm.NFC.String(s) 477 } else { 478 b = append(b, s[k:]...) 479 if norm.NFC.QuickSpan(b) != len(b) { 480 b = norm.NFC.Bytes(b) 481 } 482 // TODO: the punycode converters require strings as input. 483 s = string(b) 484 } 485 return s, err 486 } 487 488 // A labelIter allows iterating over domain name labels. 489 type labelIter struct { 490 orig string 491 slice []string 492 curStart int 493 curEnd int 494 i int 495 } 496 497 func (l *labelIter) reset() { 498 l.curStart = 0 499 l.curEnd = 0 500 l.i = 0 501 } 502 503 func (l *labelIter) done() bool { 504 return l.curStart >= len(l.orig) 505 } 506 507 func (l *labelIter) result() string { 508 if l.slice != nil { 509 return strings.Join(l.slice, ".") 510 } 511 return l.orig 512 } 513 514 func (l *labelIter) label() string { 515 if l.slice != nil { 516 return l.slice[l.i] 517 } 518 p := strings.IndexByte(l.orig[l.curStart:], '.') 519 l.curEnd = l.curStart + p 520 if p == -1 { 521 l.curEnd = len(l.orig) 522 } 523 return l.orig[l.curStart:l.curEnd] 524 } 525 526 // next sets the value to the next label. It skips the last label if it is empty. 527 func (l *labelIter) next() { 528 l.i++ 529 if l.slice != nil { 530 if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" { 531 l.curStart = len(l.orig) 532 } 533 } else { 534 l.curStart = l.curEnd + 1 535 if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' { 536 l.curStart = len(l.orig) 537 } 538 } 539 } 540 541 func (l *labelIter) set(s string) { 542 if l.slice == nil { 543 l.slice = strings.Split(l.orig, ".") 544 } 545 l.slice[l.i] = s 546 } 547 548 // acePrefix is the ASCII Compatible Encoding prefix. 549 const acePrefix = "xn--" 550 551 func (p *Profile) simplify(cat category) category { 552 switch cat { 553 case disallowedSTD3Mapped: 554 if p.useSTD3Rules { 555 cat = disallowed 556 } else { 557 cat = mapped 558 } 559 case disallowedSTD3Valid: 560 if p.useSTD3Rules { 561 cat = disallowed 562 } else { 563 cat = valid 564 } 565 case deviation: 566 if !p.transitional { 567 cat = valid 568 } 569 case validNV8, validXV8: 570 // TODO: handle V2008 571 cat = valid 572 } 573 return cat 574 } 575 576 func validateFromPunycode(p *Profile, s string) error { 577 if !norm.NFC.IsNormalString(s) { 578 return &labelError{s, "V1"} 579 } 580 for i := 0; i < len(s); { 581 v, sz := trie.lookupString(s[i:]) 582 if c := p.simplify(info(v).category()); c != valid && c != deviation { 583 return &labelError{s, "V6"} 584 } 585 i += sz 586 } 587 return nil 588 } 589 590 const ( 591 zwnj = "\u200c" 592 zwj = "\u200d" 593 ) 594 595 type joinState int8 596 597 const ( 598 stateStart joinState = iota 599 stateVirama 600 stateBefore 601 stateBeforeVirama 602 stateAfter 603 stateFAIL 604 ) 605 606 var joinStates = [][numJoinTypes]joinState{ 607 stateStart: { 608 joiningL: stateBefore, 609 joiningD: stateBefore, 610 joinZWNJ: stateFAIL, 611 joinZWJ: stateFAIL, 612 joinVirama: stateVirama, 613 }, 614 stateVirama: { 615 joiningL: stateBefore, 616 joiningD: stateBefore, 617 }, 618 stateBefore: { 619 joiningL: stateBefore, 620 joiningD: stateBefore, 621 joiningT: stateBefore, 622 joinZWNJ: stateAfter, 623 joinZWJ: stateFAIL, 624 joinVirama: stateBeforeVirama, 625 }, 626 stateBeforeVirama: { 627 joiningL: stateBefore, 628 joiningD: stateBefore, 629 joiningT: stateBefore, 630 }, 631 stateAfter: { 632 joiningL: stateFAIL, 633 joiningD: stateBefore, 634 joiningT: stateAfter, 635 joiningR: stateStart, 636 joinZWNJ: stateFAIL, 637 joinZWJ: stateFAIL, 638 joinVirama: stateAfter, // no-op as we can't accept joiners here 639 }, 640 stateFAIL: { 641 0: stateFAIL, 642 joiningL: stateFAIL, 643 joiningD: stateFAIL, 644 joiningT: stateFAIL, 645 joiningR: stateFAIL, 646 joinZWNJ: stateFAIL, 647 joinZWJ: stateFAIL, 648 joinVirama: stateFAIL, 649 }, 650 } 651 652 // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are 653 // already implicitly satisfied by the overall implementation. 654 func (p *Profile) validateLabel(s string) error { 655 if s == "" { 656 if p.verifyDNSLength { 657 return &labelError{s, "A4"} 658 } 659 return nil 660 } 661 if p.bidirule != nil && !p.bidirule(s) { 662 return &labelError{s, "B"} 663 } 664 if p.checkHyphens { 665 if len(s) > 4 && s[2] == '-' && s[3] == '-' { 666 return &labelError{s, "V2"} 667 } 668 if s[0] == '-' || s[len(s)-1] == '-' { 669 return &labelError{s, "V3"} 670 } 671 } 672 if !p.checkJoiners { 673 return nil 674 } 675 trie := p.trie // p.checkJoiners is only set if trie is set. 676 // TODO: merge the use of this in the trie. 677 v, sz := trie.lookupString(s) 678 x := info(v) 679 if x.isModifier() { 680 return &labelError{s, "V5"} 681 } 682 // Quickly return in the absence of zero-width (non) joiners. 683 if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 { 684 return nil 685 } 686 st := stateStart 687 for i := 0; ; { 688 jt := x.joinType() 689 if s[i:i+sz] == zwj { 690 jt = joinZWJ 691 } else if s[i:i+sz] == zwnj { 692 jt = joinZWNJ 693 } 694 st = joinStates[st][jt] 695 if x.isViramaModifier() { 696 st = joinStates[st][joinVirama] 697 } 698 if i += sz; i == len(s) { 699 break 700 } 701 v, sz = trie.lookupString(s[i:]) 702 x = info(v) 703 } 704 if st == stateFAIL || st == stateAfter { 705 return &labelError{s, "C"} 706 } 707 return nil 708 } 709 710 func ascii(s string) bool { 711 for i := 0; i < len(s); i++ { 712 if s[i] >= utf8.RuneSelf { 713 return false 714 } 715 } 716 return true 717 }