github.com/binkynet/BinkyNet@v1.12.1-0.20240421190447-da4e34c20be0/proto_vendor/golang.org/x/net/idna/idna10.0.0.go (about) 1 // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. 2 3 // Copyright 2016 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 // +build go1.10 8 9 // Package idna implements IDNA2008 using the compatibility processing 10 // defined by UTS (Unicode Technical Standard) #46, which defines a standard to 11 // deal with the transition from IDNA2003. 12 // 13 // IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC 14 // 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894. 15 // UTS #46 is defined in https://www.unicode.org/reports/tr46. 16 // See https://unicode.org/cldr/utility/idna.jsp for a visualization of the 17 // differences between these two standards. 18 package idna // import "golang.org/x/net/idna" 19 20 import ( 21 "fmt" 22 "strings" 23 "unicode/utf8" 24 25 "golang.org/x/text/secure/bidirule" 26 "golang.org/x/text/unicode/bidi" 27 "golang.org/x/text/unicode/norm" 28 ) 29 30 // NOTE: Unlike common practice in Go APIs, the functions will return a 31 // sanitized domain name in case of errors. Browsers sometimes use a partially 32 // evaluated string as lookup. 33 // TODO: the current error handling is, in my opinion, the least opinionated. 34 // Other strategies are also viable, though: 35 // Option 1) Return an empty string in case of error, but allow the user to 36 // specify explicitly which errors to ignore. 37 // Option 2) Return the partially evaluated string if it is itself a valid 38 // string, otherwise return the empty string in case of error. 39 // Option 3) Option 1 and 2. 40 // Option 4) Always return an empty string for now and implement Option 1 as 41 // needed, and document that the return string may not be empty in case of 42 // error in the future. 43 // I think Option 1 is best, but it is quite opinionated. 44 45 // ToASCII is a wrapper for Punycode.ToASCII. 46 func ToASCII(s string) (string, error) { 47 return Punycode.process(s, true) 48 } 49 50 // ToUnicode is a wrapper for Punycode.ToUnicode. 51 func ToUnicode(s string) (string, error) { 52 return Punycode.process(s, false) 53 } 54 55 // An Option configures a Profile at creation time. 56 type Option func(*options) 57 58 // Transitional sets a Profile to use the Transitional mapping as defined in UTS 59 // #46. This will cause, for example, "ß" to be mapped to "ss". Using the 60 // transitional mapping provides a compromise between IDNA2003 and IDNA2008 61 // compatibility. It is used by most browsers when resolving domain names. This 62 // option is only meaningful if combined with MapForLookup. 63 func Transitional(transitional bool) Option { 64 return func(o *options) { o.transitional = true } 65 } 66 67 // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts 68 // are longer than allowed by the RFC. 69 func VerifyDNSLength(verify bool) Option { 70 return func(o *options) { o.verifyDNSLength = verify } 71 } 72 73 // RemoveLeadingDots removes leading label separators. Leading runes that map to 74 // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well. 75 // 76 // This is the behavior suggested by the UTS #46 and is adopted by some 77 // browsers. 78 func RemoveLeadingDots(remove bool) Option { 79 return func(o *options) { o.removeLeadingDots = remove } 80 } 81 82 // ValidateLabels sets whether to check the mandatory label validation criteria 83 // as defined in Section 5.4 of RFC 5891. This includes testing for correct use 84 // of hyphens ('-'), normalization, validity of runes, and the context rules. 85 func ValidateLabels(enable bool) Option { 86 return func(o *options) { 87 // Don't override existing mappings, but set one that at least checks 88 // normalization if it is not set. 89 if o.mapping == nil && enable { 90 o.mapping = normalize 91 } 92 o.trie = trie 93 o.validateLabels = enable 94 o.fromPuny = validateFromPunycode 95 } 96 } 97 98 // StrictDomainName limits the set of permissible ASCII characters to those 99 // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the 100 // hyphen). This is set by default for MapForLookup and ValidateForRegistration. 101 // 102 // This option is useful, for instance, for browsers that allow characters 103 // outside this range, for example a '_' (U+005F LOW LINE). See 104 // http://www.rfc-editor.org/std/std3.txt for more details This option 105 // corresponds to the UseSTD3ASCIIRules option in UTS #46. 106 func StrictDomainName(use bool) Option { 107 return func(o *options) { 108 o.trie = trie 109 o.useSTD3Rules = use 110 o.fromPuny = validateFromPunycode 111 } 112 } 113 114 // NOTE: the following options pull in tables. The tables should not be linked 115 // in as long as the options are not used. 116 117 // BidiRule enables the Bidi rule as defined in RFC 5893. Any application 118 // that relies on proper validation of labels should include this rule. 119 func BidiRule() Option { 120 return func(o *options) { o.bidirule = bidirule.ValidString } 121 } 122 123 // ValidateForRegistration sets validation options to verify that a given IDN is 124 // properly formatted for registration as defined by Section 4 of RFC 5891. 125 func ValidateForRegistration() Option { 126 return func(o *options) { 127 o.mapping = validateRegistration 128 StrictDomainName(true)(o) 129 ValidateLabels(true)(o) 130 VerifyDNSLength(true)(o) 131 BidiRule()(o) 132 } 133 } 134 135 // MapForLookup sets validation and mapping options such that a given IDN is 136 // transformed for domain name lookup according to the requirements set out in 137 // Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894, 138 // RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option 139 // to add this check. 140 // 141 // The mappings include normalization and mapping case, width and other 142 // compatibility mappings. 143 func MapForLookup() Option { 144 return func(o *options) { 145 o.mapping = validateAndMap 146 StrictDomainName(true)(o) 147 ValidateLabels(true)(o) 148 } 149 } 150 151 type options struct { 152 transitional bool 153 useSTD3Rules bool 154 validateLabels bool 155 verifyDNSLength bool 156 removeLeadingDots bool 157 158 trie *idnaTrie 159 160 // fromPuny calls validation rules when converting A-labels to U-labels. 161 fromPuny func(p *Profile, s string) error 162 163 // mapping implements a validation and mapping step as defined in RFC 5895 164 // or UTS 46, tailored to, for example, domain registration or lookup. 165 mapping func(p *Profile, s string) (mapped string, isBidi bool, err error) 166 167 // bidirule, if specified, checks whether s conforms to the Bidi Rule 168 // defined in RFC 5893. 169 bidirule func(s string) bool 170 } 171 172 // A Profile defines the configuration of an IDNA mapper. 173 type Profile struct { 174 options 175 } 176 177 func apply(o *options, opts []Option) { 178 for _, f := range opts { 179 f(o) 180 } 181 } 182 183 // New creates a new Profile. 184 // 185 // With no options, the returned Profile is the most permissive and equals the 186 // Punycode Profile. Options can be passed to further restrict the Profile. The 187 // MapForLookup and ValidateForRegistration options set a collection of options, 188 // for lookup and registration purposes respectively, which can be tailored by 189 // adding more fine-grained options, where later options override earlier 190 // options. 191 func New(o ...Option) *Profile { 192 p := &Profile{} 193 apply(&p.options, o) 194 return p 195 } 196 197 // ToASCII converts a domain or domain label to its ASCII form. For example, 198 // ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and 199 // ToASCII("golang") is "golang". If an error is encountered it will return 200 // an error and a (partially) processed result. 201 func (p *Profile) ToASCII(s string) (string, error) { 202 return p.process(s, true) 203 } 204 205 // ToUnicode converts a domain or domain label to its Unicode form. For example, 206 // ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and 207 // ToUnicode("golang") is "golang". If an error is encountered it will return 208 // an error and a (partially) processed result. 209 func (p *Profile) ToUnicode(s string) (string, error) { 210 pp := *p 211 pp.transitional = false 212 return pp.process(s, false) 213 } 214 215 // String reports a string with a description of the profile for debugging 216 // purposes. The string format may change with different versions. 217 func (p *Profile) String() string { 218 s := "" 219 if p.transitional { 220 s = "Transitional" 221 } else { 222 s = "NonTransitional" 223 } 224 if p.useSTD3Rules { 225 s += ":UseSTD3Rules" 226 } 227 if p.validateLabels { 228 s += ":ValidateLabels" 229 } 230 if p.verifyDNSLength { 231 s += ":VerifyDNSLength" 232 } 233 return s 234 } 235 236 var ( 237 // Punycode is a Profile that does raw punycode processing with a minimum 238 // of validation. 239 Punycode *Profile = punycode 240 241 // Lookup is the recommended profile for looking up domain names, according 242 // to Section 5 of RFC 5891. The exact configuration of this profile may 243 // change over time. 244 Lookup *Profile = lookup 245 246 // Display is the recommended profile for displaying domain names. 247 // The configuration of this profile may change over time. 248 Display *Profile = display 249 250 // Registration is the recommended profile for checking whether a given 251 // IDN is valid for registration, according to Section 4 of RFC 5891. 252 Registration *Profile = registration 253 254 punycode = &Profile{} 255 lookup = &Profile{options{ 256 transitional: true, 257 useSTD3Rules: true, 258 validateLabels: true, 259 trie: trie, 260 fromPuny: validateFromPunycode, 261 mapping: validateAndMap, 262 bidirule: bidirule.ValidString, 263 }} 264 display = &Profile{options{ 265 useSTD3Rules: true, 266 validateLabels: true, 267 trie: trie, 268 fromPuny: validateFromPunycode, 269 mapping: validateAndMap, 270 bidirule: bidirule.ValidString, 271 }} 272 registration = &Profile{options{ 273 useSTD3Rules: true, 274 validateLabels: true, 275 verifyDNSLength: true, 276 trie: trie, 277 fromPuny: validateFromPunycode, 278 mapping: validateRegistration, 279 bidirule: bidirule.ValidString, 280 }} 281 282 // TODO: profiles 283 // Register: recommended for approving domain names: don't do any mappings 284 // but rather reject on invalid input. Bundle or block deviation characters. 285 ) 286 287 type labelError struct{ label, code_ string } 288 289 func (e labelError) code() string { return e.code_ } 290 func (e labelError) Error() string { 291 return fmt.Sprintf("idna: invalid label %q", e.label) 292 } 293 294 type runeError rune 295 296 func (e runeError) code() string { return "P1" } 297 func (e runeError) Error() string { 298 return fmt.Sprintf("idna: disallowed rune %U", e) 299 } 300 301 // process implements the algorithm described in section 4 of UTS #46, 302 // see https://www.unicode.org/reports/tr46. 303 func (p *Profile) process(s string, toASCII bool) (string, error) { 304 var err error 305 var isBidi bool 306 if p.mapping != nil { 307 s, isBidi, err = p.mapping(p, s) 308 } 309 // Remove leading empty labels. 310 if p.removeLeadingDots { 311 for ; len(s) > 0 && s[0] == '.'; s = s[1:] { 312 } 313 } 314 // TODO: allow for a quick check of the tables data. 315 // It seems like we should only create this error on ToASCII, but the 316 // UTS 46 conformance tests suggests we should always check this. 317 if err == nil && p.verifyDNSLength && s == "" { 318 err = &labelError{s, "A4"} 319 } 320 labels := labelIter{orig: s} 321 for ; !labels.done(); labels.next() { 322 label := labels.label() 323 if label == "" { 324 // Empty labels are not okay. The label iterator skips the last 325 // label if it is empty. 326 if err == nil && p.verifyDNSLength { 327 err = &labelError{s, "A4"} 328 } 329 continue 330 } 331 if strings.HasPrefix(label, acePrefix) { 332 u, err2 := decode(label[len(acePrefix):]) 333 if err2 != nil { 334 if err == nil { 335 err = err2 336 } 337 // Spec says keep the old label. 338 continue 339 } 340 isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight 341 labels.set(u) 342 if err == nil && p.validateLabels { 343 err = p.fromPuny(p, u) 344 } 345 if err == nil { 346 // This should be called on NonTransitional, according to the 347 // spec, but that currently does not have any effect. Use the 348 // original profile to preserve options. 349 err = p.validateLabel(u) 350 } 351 } else if err == nil { 352 err = p.validateLabel(label) 353 } 354 } 355 if isBidi && p.bidirule != nil && err == nil { 356 for labels.reset(); !labels.done(); labels.next() { 357 if !p.bidirule(labels.label()) { 358 err = &labelError{s, "B"} 359 break 360 } 361 } 362 } 363 if toASCII { 364 for labels.reset(); !labels.done(); labels.next() { 365 label := labels.label() 366 if !ascii(label) { 367 a, err2 := encode(acePrefix, label) 368 if err == nil { 369 err = err2 370 } 371 label = a 372 labels.set(a) 373 } 374 n := len(label) 375 if p.verifyDNSLength && err == nil && (n == 0 || n > 63) { 376 err = &labelError{label, "A4"} 377 } 378 } 379 } 380 s = labels.result() 381 if toASCII && p.verifyDNSLength && err == nil { 382 // Compute the length of the domain name minus the root label and its dot. 383 n := len(s) 384 if n > 0 && s[n-1] == '.' { 385 n-- 386 } 387 if len(s) < 1 || n > 253 { 388 err = &labelError{s, "A4"} 389 } 390 } 391 return s, err 392 } 393 394 func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) { 395 // TODO: consider first doing a quick check to see if any of these checks 396 // need to be done. This will make it slower in the general case, but 397 // faster in the common case. 398 mapped = norm.NFC.String(s) 399 isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft 400 return mapped, isBidi, nil 401 } 402 403 func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) { 404 // TODO: filter need for normalization in loop below. 405 if !norm.NFC.IsNormalString(s) { 406 return s, false, &labelError{s, "V1"} 407 } 408 for i := 0; i < len(s); { 409 v, sz := trie.lookupString(s[i:]) 410 if sz == 0 { 411 return s, bidi, runeError(utf8.RuneError) 412 } 413 bidi = bidi || info(v).isBidi(s[i:]) 414 // Copy bytes not copied so far. 415 switch p.simplify(info(v).category()) { 416 // TODO: handle the NV8 defined in the Unicode idna data set to allow 417 // for strict conformance to IDNA2008. 418 case valid, deviation: 419 case disallowed, mapped, unknown, ignored: 420 r, _ := utf8.DecodeRuneInString(s[i:]) 421 return s, bidi, runeError(r) 422 } 423 i += sz 424 } 425 return s, bidi, nil 426 } 427 428 func (c info) isBidi(s string) bool { 429 if !c.isMapped() { 430 return c&attributesMask == rtl 431 } 432 // TODO: also store bidi info for mapped data. This is possible, but a bit 433 // cumbersome and not for the common case. 434 p, _ := bidi.LookupString(s) 435 switch p.Class() { 436 case bidi.R, bidi.AL, bidi.AN: 437 return true 438 } 439 return false 440 } 441 442 func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) { 443 var ( 444 b []byte 445 k int 446 ) 447 // combinedInfoBits contains the or-ed bits of all runes. We use this 448 // to derive the mayNeedNorm bit later. This may trigger normalization 449 // overeagerly, but it will not do so in the common case. The end result 450 // is another 10% saving on BenchmarkProfile for the common case. 451 var combinedInfoBits info 452 for i := 0; i < len(s); { 453 v, sz := trie.lookupString(s[i:]) 454 if sz == 0 { 455 b = append(b, s[k:i]...) 456 b = append(b, "\ufffd"...) 457 k = len(s) 458 if err == nil { 459 err = runeError(utf8.RuneError) 460 } 461 break 462 } 463 combinedInfoBits |= info(v) 464 bidi = bidi || info(v).isBidi(s[i:]) 465 start := i 466 i += sz 467 // Copy bytes not copied so far. 468 switch p.simplify(info(v).category()) { 469 case valid: 470 continue 471 case disallowed: 472 if err == nil { 473 r, _ := utf8.DecodeRuneInString(s[start:]) 474 err = runeError(r) 475 } 476 continue 477 case mapped, deviation: 478 b = append(b, s[k:start]...) 479 b = info(v).appendMapping(b, s[start:i]) 480 case ignored: 481 b = append(b, s[k:start]...) 482 // drop the rune 483 case unknown: 484 b = append(b, s[k:start]...) 485 b = append(b, "\ufffd"...) 486 } 487 k = i 488 } 489 if k == 0 { 490 // No changes so far. 491 if combinedInfoBits&mayNeedNorm != 0 { 492 s = norm.NFC.String(s) 493 } 494 } else { 495 b = append(b, s[k:]...) 496 if norm.NFC.QuickSpan(b) != len(b) { 497 b = norm.NFC.Bytes(b) 498 } 499 // TODO: the punycode converters require strings as input. 500 s = string(b) 501 } 502 return s, bidi, err 503 } 504 505 // A labelIter allows iterating over domain name labels. 506 type labelIter struct { 507 orig string 508 slice []string 509 curStart int 510 curEnd int 511 i int 512 } 513 514 func (l *labelIter) reset() { 515 l.curStart = 0 516 l.curEnd = 0 517 l.i = 0 518 } 519 520 func (l *labelIter) done() bool { 521 return l.curStart >= len(l.orig) 522 } 523 524 func (l *labelIter) result() string { 525 if l.slice != nil { 526 return strings.Join(l.slice, ".") 527 } 528 return l.orig 529 } 530 531 func (l *labelIter) label() string { 532 if l.slice != nil { 533 return l.slice[l.i] 534 } 535 p := strings.IndexByte(l.orig[l.curStart:], '.') 536 l.curEnd = l.curStart + p 537 if p == -1 { 538 l.curEnd = len(l.orig) 539 } 540 return l.orig[l.curStart:l.curEnd] 541 } 542 543 // next sets the value to the next label. It skips the last label if it is empty. 544 func (l *labelIter) next() { 545 l.i++ 546 if l.slice != nil { 547 if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" { 548 l.curStart = len(l.orig) 549 } 550 } else { 551 l.curStart = l.curEnd + 1 552 if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' { 553 l.curStart = len(l.orig) 554 } 555 } 556 } 557 558 func (l *labelIter) set(s string) { 559 if l.slice == nil { 560 l.slice = strings.Split(l.orig, ".") 561 } 562 l.slice[l.i] = s 563 } 564 565 // acePrefix is the ASCII Compatible Encoding prefix. 566 const acePrefix = "xn--" 567 568 func (p *Profile) simplify(cat category) category { 569 switch cat { 570 case disallowedSTD3Mapped: 571 if p.useSTD3Rules { 572 cat = disallowed 573 } else { 574 cat = mapped 575 } 576 case disallowedSTD3Valid: 577 if p.useSTD3Rules { 578 cat = disallowed 579 } else { 580 cat = valid 581 } 582 case deviation: 583 if !p.transitional { 584 cat = valid 585 } 586 case validNV8, validXV8: 587 // TODO: handle V2008 588 cat = valid 589 } 590 return cat 591 } 592 593 func validateFromPunycode(p *Profile, s string) error { 594 if !norm.NFC.IsNormalString(s) { 595 return &labelError{s, "V1"} 596 } 597 // TODO: detect whether string may have to be normalized in the following 598 // loop. 599 for i := 0; i < len(s); { 600 v, sz := trie.lookupString(s[i:]) 601 if sz == 0 { 602 return runeError(utf8.RuneError) 603 } 604 if c := p.simplify(info(v).category()); c != valid && c != deviation { 605 return &labelError{s, "V6"} 606 } 607 i += sz 608 } 609 return nil 610 } 611 612 const ( 613 zwnj = "\u200c" 614 zwj = "\u200d" 615 ) 616 617 type joinState int8 618 619 const ( 620 stateStart joinState = iota 621 stateVirama 622 stateBefore 623 stateBeforeVirama 624 stateAfter 625 stateFAIL 626 ) 627 628 var joinStates = [][numJoinTypes]joinState{ 629 stateStart: { 630 joiningL: stateBefore, 631 joiningD: stateBefore, 632 joinZWNJ: stateFAIL, 633 joinZWJ: stateFAIL, 634 joinVirama: stateVirama, 635 }, 636 stateVirama: { 637 joiningL: stateBefore, 638 joiningD: stateBefore, 639 }, 640 stateBefore: { 641 joiningL: stateBefore, 642 joiningD: stateBefore, 643 joiningT: stateBefore, 644 joinZWNJ: stateAfter, 645 joinZWJ: stateFAIL, 646 joinVirama: stateBeforeVirama, 647 }, 648 stateBeforeVirama: { 649 joiningL: stateBefore, 650 joiningD: stateBefore, 651 joiningT: stateBefore, 652 }, 653 stateAfter: { 654 joiningL: stateFAIL, 655 joiningD: stateBefore, 656 joiningT: stateAfter, 657 joiningR: stateStart, 658 joinZWNJ: stateFAIL, 659 joinZWJ: stateFAIL, 660 joinVirama: stateAfter, // no-op as we can't accept joiners here 661 }, 662 stateFAIL: { 663 0: stateFAIL, 664 joiningL: stateFAIL, 665 joiningD: stateFAIL, 666 joiningT: stateFAIL, 667 joiningR: stateFAIL, 668 joinZWNJ: stateFAIL, 669 joinZWJ: stateFAIL, 670 joinVirama: stateFAIL, 671 }, 672 } 673 674 // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are 675 // already implicitly satisfied by the overall implementation. 676 func (p *Profile) validateLabel(s string) (err error) { 677 if s == "" { 678 if p.verifyDNSLength { 679 return &labelError{s, "A4"} 680 } 681 return nil 682 } 683 if !p.validateLabels { 684 return nil 685 } 686 trie := p.trie // p.validateLabels is only set if trie is set. 687 if len(s) > 4 && s[2] == '-' && s[3] == '-' { 688 return &labelError{s, "V2"} 689 } 690 if s[0] == '-' || s[len(s)-1] == '-' { 691 return &labelError{s, "V3"} 692 } 693 // TODO: merge the use of this in the trie. 694 v, sz := trie.lookupString(s) 695 x := info(v) 696 if x.isModifier() { 697 return &labelError{s, "V5"} 698 } 699 // Quickly return in the absence of zero-width (non) joiners. 700 if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 { 701 return nil 702 } 703 st := stateStart 704 for i := 0; ; { 705 jt := x.joinType() 706 if s[i:i+sz] == zwj { 707 jt = joinZWJ 708 } else if s[i:i+sz] == zwnj { 709 jt = joinZWNJ 710 } 711 st = joinStates[st][jt] 712 if x.isViramaModifier() { 713 st = joinStates[st][joinVirama] 714 } 715 if i += sz; i == len(s) { 716 break 717 } 718 v, sz = trie.lookupString(s[i:]) 719 x = info(v) 720 } 721 if st == stateFAIL || st == stateAfter { 722 return &labelError{s, "C"} 723 } 724 return nil 725 } 726 727 func ascii(s string) bool { 728 for i := 0; i < len(s); i++ { 729 if s[i] >= utf8.RuneSelf { 730 return false 731 } 732 } 733 return true 734 }