github.com/binkynet/BinkyNet@v1.12.1-0.20240421190447-da4e34c20be0/proto_vendor/golang.org/x/net/idna/idna9.0.0.go (about) 1 // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. 2 3 // Copyright 2016 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 // +build !go1.10 8 9 // Package idna implements IDNA2008 using the compatibility processing 10 // defined by UTS (Unicode Technical Standard) #46, which defines a standard to 11 // deal with the transition from IDNA2003. 12 // 13 // IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC 14 // 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894. 15 // UTS #46 is defined in https://www.unicode.org/reports/tr46. 16 // See https://unicode.org/cldr/utility/idna.jsp for a visualization of the 17 // differences between these two standards. 18 package idna // import "golang.org/x/net/idna" 19 20 import ( 21 "fmt" 22 "strings" 23 "unicode/utf8" 24 25 "golang.org/x/text/secure/bidirule" 26 "golang.org/x/text/unicode/norm" 27 ) 28 29 // NOTE: Unlike common practice in Go APIs, the functions will return a 30 // sanitized domain name in case of errors. Browsers sometimes use a partially 31 // evaluated string as lookup. 32 // TODO: the current error handling is, in my opinion, the least opinionated. 33 // Other strategies are also viable, though: 34 // Option 1) Return an empty string in case of error, but allow the user to 35 // specify explicitly which errors to ignore. 36 // Option 2) Return the partially evaluated string if it is itself a valid 37 // string, otherwise return the empty string in case of error. 38 // Option 3) Option 1 and 2. 39 // Option 4) Always return an empty string for now and implement Option 1 as 40 // needed, and document that the return string may not be empty in case of 41 // error in the future. 42 // I think Option 1 is best, but it is quite opinionated. 43 44 // ToASCII is a wrapper for Punycode.ToASCII. 45 func ToASCII(s string) (string, error) { 46 return Punycode.process(s, true) 47 } 48 49 // ToUnicode is a wrapper for Punycode.ToUnicode. 50 func ToUnicode(s string) (string, error) { 51 return Punycode.process(s, false) 52 } 53 54 // An Option configures a Profile at creation time. 55 type Option func(*options) 56 57 // Transitional sets a Profile to use the Transitional mapping as defined in UTS 58 // #46. This will cause, for example, "ß" to be mapped to "ss". Using the 59 // transitional mapping provides a compromise between IDNA2003 and IDNA2008 60 // compatibility. It is used by most browsers when resolving domain names. This 61 // option is only meaningful if combined with MapForLookup. 62 func Transitional(transitional bool) Option { 63 return func(o *options) { o.transitional = true } 64 } 65 66 // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts 67 // are longer than allowed by the RFC. 68 func VerifyDNSLength(verify bool) Option { 69 return func(o *options) { o.verifyDNSLength = verify } 70 } 71 72 // RemoveLeadingDots removes leading label separators. Leading runes that map to 73 // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well. 74 // 75 // This is the behavior suggested by the UTS #46 and is adopted by some 76 // browsers. 77 func RemoveLeadingDots(remove bool) Option { 78 return func(o *options) { o.removeLeadingDots = remove } 79 } 80 81 // ValidateLabels sets whether to check the mandatory label validation criteria 82 // as defined in Section 5.4 of RFC 5891. This includes testing for correct use 83 // of hyphens ('-'), normalization, validity of runes, and the context rules. 84 func ValidateLabels(enable bool) Option { 85 return func(o *options) { 86 // Don't override existing mappings, but set one that at least checks 87 // normalization if it is not set. 88 if o.mapping == nil && enable { 89 o.mapping = normalize 90 } 91 o.trie = trie 92 o.validateLabels = enable 93 o.fromPuny = validateFromPunycode 94 } 95 } 96 97 // StrictDomainName limits the set of permissable ASCII characters to those 98 // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the 99 // hyphen). This is set by default for MapForLookup and ValidateForRegistration. 100 // 101 // This option is useful, for instance, for browsers that allow characters 102 // outside this range, for example a '_' (U+005F LOW LINE). See 103 // http://www.rfc-editor.org/std/std3.txt for more details This option 104 // corresponds to the UseSTD3ASCIIRules option in UTS #46. 105 func StrictDomainName(use bool) Option { 106 return func(o *options) { 107 o.trie = trie 108 o.useSTD3Rules = use 109 o.fromPuny = validateFromPunycode 110 } 111 } 112 113 // NOTE: the following options pull in tables. The tables should not be linked 114 // in as long as the options are not used. 115 116 // BidiRule enables the Bidi rule as defined in RFC 5893. Any application 117 // that relies on proper validation of labels should include this rule. 118 func BidiRule() Option { 119 return func(o *options) { o.bidirule = bidirule.ValidString } 120 } 121 122 // ValidateForRegistration sets validation options to verify that a given IDN is 123 // properly formatted for registration as defined by Section 4 of RFC 5891. 124 func ValidateForRegistration() Option { 125 return func(o *options) { 126 o.mapping = validateRegistration 127 StrictDomainName(true)(o) 128 ValidateLabels(true)(o) 129 VerifyDNSLength(true)(o) 130 BidiRule()(o) 131 } 132 } 133 134 // MapForLookup sets validation and mapping options such that a given IDN is 135 // transformed for domain name lookup according to the requirements set out in 136 // Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894, 137 // RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option 138 // to add this check. 139 // 140 // The mappings include normalization and mapping case, width and other 141 // compatibility mappings. 142 func MapForLookup() Option { 143 return func(o *options) { 144 o.mapping = validateAndMap 145 StrictDomainName(true)(o) 146 ValidateLabels(true)(o) 147 RemoveLeadingDots(true)(o) 148 } 149 } 150 151 type options struct { 152 transitional bool 153 useSTD3Rules bool 154 validateLabels bool 155 verifyDNSLength bool 156 removeLeadingDots bool 157 158 trie *idnaTrie 159 160 // fromPuny calls validation rules when converting A-labels to U-labels. 161 fromPuny func(p *Profile, s string) error 162 163 // mapping implements a validation and mapping step as defined in RFC 5895 164 // or UTS 46, tailored to, for example, domain registration or lookup. 165 mapping func(p *Profile, s string) (string, error) 166 167 // bidirule, if specified, checks whether s conforms to the Bidi Rule 168 // defined in RFC 5893. 169 bidirule func(s string) bool 170 } 171 172 // A Profile defines the configuration of a IDNA mapper. 173 type Profile struct { 174 options 175 } 176 177 func apply(o *options, opts []Option) { 178 for _, f := range opts { 179 f(o) 180 } 181 } 182 183 // New creates a new Profile. 184 // 185 // With no options, the returned Profile is the most permissive and equals the 186 // Punycode Profile. Options can be passed to further restrict the Profile. The 187 // MapForLookup and ValidateForRegistration options set a collection of options, 188 // for lookup and registration purposes respectively, which can be tailored by 189 // adding more fine-grained options, where later options override earlier 190 // options. 191 func New(o ...Option) *Profile { 192 p := &Profile{} 193 apply(&p.options, o) 194 return p 195 } 196 197 // ToASCII converts a domain or domain label to its ASCII form. For example, 198 // ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and 199 // ToASCII("golang") is "golang". If an error is encountered it will return 200 // an error and a (partially) processed result. 201 func (p *Profile) ToASCII(s string) (string, error) { 202 return p.process(s, true) 203 } 204 205 // ToUnicode converts a domain or domain label to its Unicode form. For example, 206 // ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and 207 // ToUnicode("golang") is "golang". If an error is encountered it will return 208 // an error and a (partially) processed result. 209 func (p *Profile) ToUnicode(s string) (string, error) { 210 pp := *p 211 pp.transitional = false 212 return pp.process(s, false) 213 } 214 215 // String reports a string with a description of the profile for debugging 216 // purposes. The string format may change with different versions. 217 func (p *Profile) String() string { 218 s := "" 219 if p.transitional { 220 s = "Transitional" 221 } else { 222 s = "NonTransitional" 223 } 224 if p.useSTD3Rules { 225 s += ":UseSTD3Rules" 226 } 227 if p.validateLabels { 228 s += ":ValidateLabels" 229 } 230 if p.verifyDNSLength { 231 s += ":VerifyDNSLength" 232 } 233 return s 234 } 235 236 var ( 237 // Punycode is a Profile that does raw punycode processing with a minimum 238 // of validation. 239 Punycode *Profile = punycode 240 241 // Lookup is the recommended profile for looking up domain names, according 242 // to Section 5 of RFC 5891. The exact configuration of this profile may 243 // change over time. 244 Lookup *Profile = lookup 245 246 // Display is the recommended profile for displaying domain names. 247 // The configuration of this profile may change over time. 248 Display *Profile = display 249 250 // Registration is the recommended profile for checking whether a given 251 // IDN is valid for registration, according to Section 4 of RFC 5891. 252 Registration *Profile = registration 253 254 punycode = &Profile{} 255 lookup = &Profile{options{ 256 transitional: true, 257 useSTD3Rules: true, 258 validateLabels: true, 259 removeLeadingDots: true, 260 trie: trie, 261 fromPuny: validateFromPunycode, 262 mapping: validateAndMap, 263 bidirule: bidirule.ValidString, 264 }} 265 display = &Profile{options{ 266 useSTD3Rules: true, 267 validateLabels: true, 268 removeLeadingDots: true, 269 trie: trie, 270 fromPuny: validateFromPunycode, 271 mapping: validateAndMap, 272 bidirule: bidirule.ValidString, 273 }} 274 registration = &Profile{options{ 275 useSTD3Rules: true, 276 validateLabels: true, 277 verifyDNSLength: true, 278 trie: trie, 279 fromPuny: validateFromPunycode, 280 mapping: validateRegistration, 281 bidirule: bidirule.ValidString, 282 }} 283 284 // TODO: profiles 285 // Register: recommended for approving domain names: don't do any mappings 286 // but rather reject on invalid input. Bundle or block deviation characters. 287 ) 288 289 type labelError struct{ label, code_ string } 290 291 func (e labelError) code() string { return e.code_ } 292 func (e labelError) Error() string { 293 return fmt.Sprintf("idna: invalid label %q", e.label) 294 } 295 296 type runeError rune 297 298 func (e runeError) code() string { return "P1" } 299 func (e runeError) Error() string { 300 return fmt.Sprintf("idna: disallowed rune %U", e) 301 } 302 303 // process implements the algorithm described in section 4 of UTS #46, 304 // see https://www.unicode.org/reports/tr46. 305 func (p *Profile) process(s string, toASCII bool) (string, error) { 306 var err error 307 if p.mapping != nil { 308 s, err = p.mapping(p, s) 309 } 310 // Remove leading empty labels. 311 if p.removeLeadingDots { 312 for ; len(s) > 0 && s[0] == '.'; s = s[1:] { 313 } 314 } 315 // It seems like we should only create this error on ToASCII, but the 316 // UTS 46 conformance tests suggests we should always check this. 317 if err == nil && p.verifyDNSLength && s == "" { 318 err = &labelError{s, "A4"} 319 } 320 labels := labelIter{orig: s} 321 for ; !labels.done(); labels.next() { 322 label := labels.label() 323 if label == "" { 324 // Empty labels are not okay. The label iterator skips the last 325 // label if it is empty. 326 if err == nil && p.verifyDNSLength { 327 err = &labelError{s, "A4"} 328 } 329 continue 330 } 331 if strings.HasPrefix(label, acePrefix) { 332 u, err2 := decode(label[len(acePrefix):]) 333 if err2 != nil { 334 if err == nil { 335 err = err2 336 } 337 // Spec says keep the old label. 338 continue 339 } 340 labels.set(u) 341 if err == nil && p.validateLabels { 342 err = p.fromPuny(p, u) 343 } 344 if err == nil { 345 // This should be called on NonTransitional, according to the 346 // spec, but that currently does not have any effect. Use the 347 // original profile to preserve options. 348 err = p.validateLabel(u) 349 } 350 } else if err == nil { 351 err = p.validateLabel(label) 352 } 353 } 354 if toASCII { 355 for labels.reset(); !labels.done(); labels.next() { 356 label := labels.label() 357 if !ascii(label) { 358 a, err2 := encode(acePrefix, label) 359 if err == nil { 360 err = err2 361 } 362 label = a 363 labels.set(a) 364 } 365 n := len(label) 366 if p.verifyDNSLength && err == nil && (n == 0 || n > 63) { 367 err = &labelError{label, "A4"} 368 } 369 } 370 } 371 s = labels.result() 372 if toASCII && p.verifyDNSLength && err == nil { 373 // Compute the length of the domain name minus the root label and its dot. 374 n := len(s) 375 if n > 0 && s[n-1] == '.' { 376 n-- 377 } 378 if len(s) < 1 || n > 253 { 379 err = &labelError{s, "A4"} 380 } 381 } 382 return s, err 383 } 384 385 func normalize(p *Profile, s string) (string, error) { 386 return norm.NFC.String(s), nil 387 } 388 389 func validateRegistration(p *Profile, s string) (string, error) { 390 if !norm.NFC.IsNormalString(s) { 391 return s, &labelError{s, "V1"} 392 } 393 for i := 0; i < len(s); { 394 v, sz := trie.lookupString(s[i:]) 395 // Copy bytes not copied so far. 396 switch p.simplify(info(v).category()) { 397 // TODO: handle the NV8 defined in the Unicode idna data set to allow 398 // for strict conformance to IDNA2008. 399 case valid, deviation: 400 case disallowed, mapped, unknown, ignored: 401 r, _ := utf8.DecodeRuneInString(s[i:]) 402 return s, runeError(r) 403 } 404 i += sz 405 } 406 return s, nil 407 } 408 409 func validateAndMap(p *Profile, s string) (string, error) { 410 var ( 411 err error 412 b []byte 413 k int 414 ) 415 for i := 0; i < len(s); { 416 v, sz := trie.lookupString(s[i:]) 417 start := i 418 i += sz 419 // Copy bytes not copied so far. 420 switch p.simplify(info(v).category()) { 421 case valid: 422 continue 423 case disallowed: 424 if err == nil { 425 r, _ := utf8.DecodeRuneInString(s[start:]) 426 err = runeError(r) 427 } 428 continue 429 case mapped, deviation: 430 b = append(b, s[k:start]...) 431 b = info(v).appendMapping(b, s[start:i]) 432 case ignored: 433 b = append(b, s[k:start]...) 434 // drop the rune 435 case unknown: 436 b = append(b, s[k:start]...) 437 b = append(b, "\ufffd"...) 438 } 439 k = i 440 } 441 if k == 0 { 442 // No changes so far. 443 s = norm.NFC.String(s) 444 } else { 445 b = append(b, s[k:]...) 446 if norm.NFC.QuickSpan(b) != len(b) { 447 b = norm.NFC.Bytes(b) 448 } 449 // TODO: the punycode converters require strings as input. 450 s = string(b) 451 } 452 return s, err 453 } 454 455 // A labelIter allows iterating over domain name labels. 456 type labelIter struct { 457 orig string 458 slice []string 459 curStart int 460 curEnd int 461 i int 462 } 463 464 func (l *labelIter) reset() { 465 l.curStart = 0 466 l.curEnd = 0 467 l.i = 0 468 } 469 470 func (l *labelIter) done() bool { 471 return l.curStart >= len(l.orig) 472 } 473 474 func (l *labelIter) result() string { 475 if l.slice != nil { 476 return strings.Join(l.slice, ".") 477 } 478 return l.orig 479 } 480 481 func (l *labelIter) label() string { 482 if l.slice != nil { 483 return l.slice[l.i] 484 } 485 p := strings.IndexByte(l.orig[l.curStart:], '.') 486 l.curEnd = l.curStart + p 487 if p == -1 { 488 l.curEnd = len(l.orig) 489 } 490 return l.orig[l.curStart:l.curEnd] 491 } 492 493 // next sets the value to the next label. It skips the last label if it is empty. 494 func (l *labelIter) next() { 495 l.i++ 496 if l.slice != nil { 497 if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" { 498 l.curStart = len(l.orig) 499 } 500 } else { 501 l.curStart = l.curEnd + 1 502 if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' { 503 l.curStart = len(l.orig) 504 } 505 } 506 } 507 508 func (l *labelIter) set(s string) { 509 if l.slice == nil { 510 l.slice = strings.Split(l.orig, ".") 511 } 512 l.slice[l.i] = s 513 } 514 515 // acePrefix is the ASCII Compatible Encoding prefix. 516 const acePrefix = "xn--" 517 518 func (p *Profile) simplify(cat category) category { 519 switch cat { 520 case disallowedSTD3Mapped: 521 if p.useSTD3Rules { 522 cat = disallowed 523 } else { 524 cat = mapped 525 } 526 case disallowedSTD3Valid: 527 if p.useSTD3Rules { 528 cat = disallowed 529 } else { 530 cat = valid 531 } 532 case deviation: 533 if !p.transitional { 534 cat = valid 535 } 536 case validNV8, validXV8: 537 // TODO: handle V2008 538 cat = valid 539 } 540 return cat 541 } 542 543 func validateFromPunycode(p *Profile, s string) error { 544 if !norm.NFC.IsNormalString(s) { 545 return &labelError{s, "V1"} 546 } 547 for i := 0; i < len(s); { 548 v, sz := trie.lookupString(s[i:]) 549 if c := p.simplify(info(v).category()); c != valid && c != deviation { 550 return &labelError{s, "V6"} 551 } 552 i += sz 553 } 554 return nil 555 } 556 557 const ( 558 zwnj = "\u200c" 559 zwj = "\u200d" 560 ) 561 562 type joinState int8 563 564 const ( 565 stateStart joinState = iota 566 stateVirama 567 stateBefore 568 stateBeforeVirama 569 stateAfter 570 stateFAIL 571 ) 572 573 var joinStates = [][numJoinTypes]joinState{ 574 stateStart: { 575 joiningL: stateBefore, 576 joiningD: stateBefore, 577 joinZWNJ: stateFAIL, 578 joinZWJ: stateFAIL, 579 joinVirama: stateVirama, 580 }, 581 stateVirama: { 582 joiningL: stateBefore, 583 joiningD: stateBefore, 584 }, 585 stateBefore: { 586 joiningL: stateBefore, 587 joiningD: stateBefore, 588 joiningT: stateBefore, 589 joinZWNJ: stateAfter, 590 joinZWJ: stateFAIL, 591 joinVirama: stateBeforeVirama, 592 }, 593 stateBeforeVirama: { 594 joiningL: stateBefore, 595 joiningD: stateBefore, 596 joiningT: stateBefore, 597 }, 598 stateAfter: { 599 joiningL: stateFAIL, 600 joiningD: stateBefore, 601 joiningT: stateAfter, 602 joiningR: stateStart, 603 joinZWNJ: stateFAIL, 604 joinZWJ: stateFAIL, 605 joinVirama: stateAfter, // no-op as we can't accept joiners here 606 }, 607 stateFAIL: { 608 0: stateFAIL, 609 joiningL: stateFAIL, 610 joiningD: stateFAIL, 611 joiningT: stateFAIL, 612 joiningR: stateFAIL, 613 joinZWNJ: stateFAIL, 614 joinZWJ: stateFAIL, 615 joinVirama: stateFAIL, 616 }, 617 } 618 619 // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are 620 // already implicitly satisfied by the overall implementation. 621 func (p *Profile) validateLabel(s string) error { 622 if s == "" { 623 if p.verifyDNSLength { 624 return &labelError{s, "A4"} 625 } 626 return nil 627 } 628 if p.bidirule != nil && !p.bidirule(s) { 629 return &labelError{s, "B"} 630 } 631 if !p.validateLabels { 632 return nil 633 } 634 trie := p.trie // p.validateLabels is only set if trie is set. 635 if len(s) > 4 && s[2] == '-' && s[3] == '-' { 636 return &labelError{s, "V2"} 637 } 638 if s[0] == '-' || s[len(s)-1] == '-' { 639 return &labelError{s, "V3"} 640 } 641 // TODO: merge the use of this in the trie. 642 v, sz := trie.lookupString(s) 643 x := info(v) 644 if x.isModifier() { 645 return &labelError{s, "V5"} 646 } 647 // Quickly return in the absence of zero-width (non) joiners. 648 if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 { 649 return nil 650 } 651 st := stateStart 652 for i := 0; ; { 653 jt := x.joinType() 654 if s[i:i+sz] == zwj { 655 jt = joinZWJ 656 } else if s[i:i+sz] == zwnj { 657 jt = joinZWNJ 658 } 659 st = joinStates[st][jt] 660 if x.isViramaModifier() { 661 st = joinStates[st][joinVirama] 662 } 663 if i += sz; i == len(s) { 664 break 665 } 666 v, sz = trie.lookupString(s[i:]) 667 x = info(v) 668 } 669 if st == stateFAIL || st == stateAfter { 670 return &labelError{s, "C"} 671 } 672 return nil 673 } 674 675 func ascii(s string) bool { 676 for i := 0; i < len(s); i++ { 677 if s[i] >= utf8.RuneSelf { 678 return false 679 } 680 } 681 return true 682 }