golang.org/toolchain@v0.0.1-go1.9rc2.windows-amd64/src/vendor/golang_org/x/net/idna/idna.go (about)

     1  // Code generated by running "go run gen.go -core" in golang.org/x/text. DO NOT EDIT.
     2  
     3  // Copyright 2016 The Go Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  // Package idna implements IDNA2008 using the compatibility processing
     8  // defined by UTS (Unicode Technical Standard) #46, which defines a standard to
     9  // deal with the transition from IDNA2003.
    10  //
    11  // IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
    12  // 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
    13  // UTS #46 is defined in http://www.unicode.org/reports/tr46.
    14  // See http://unicode.org/cldr/utility/idna.jsp for a visualization of the
    15  // differences between these two standards.
    16  package idna // import "golang_org/x/text/internal/export/idna"
    17  
    18  import (
    19  	"fmt"
    20  	"strings"
    21  	"unicode/utf8"
    22  
    23  	"golang_org/x/text/secure/bidirule"
    24  	"golang_org/x/text/unicode/norm"
    25  )
    26  
    27  // NOTE: Unlike common practice in Go APIs, the functions will return a
    28  // sanitized domain name in case of errors. Browsers sometimes use a partially
    29  // evaluated string as lookup.
    30  // TODO: the current error handling is, in my opinion, the least opinionated.
    31  // Other strategies are also viable, though:
    32  // Option 1) Return an empty string in case of error, but allow the user to
    33  //    specify explicitly which errors to ignore.
    34  // Option 2) Return the partially evaluated string if it is itself a valid
    35  //    string, otherwise return the empty string in case of error.
    36  // Option 3) Option 1 and 2.
    37  // Option 4) Always return an empty string for now and implement Option 1 as
    38  //    needed, and document that the return string may not be empty in case of
    39  //    error in the future.
    40  // I think Option 1 is best, but it is quite opinionated.
    41  
    42  // ToASCII is a wrapper for Punycode.ToASCII.
    43  func ToASCII(s string) (string, error) {
    44  	return Punycode.process(s, true)
    45  }
    46  
    47  // ToUnicode is a wrapper for Punycode.ToUnicode.
    48  func ToUnicode(s string) (string, error) {
    49  	return Punycode.process(s, false)
    50  }
    51  
    52  // An Option configures a Profile at creation time.
    53  type Option func(*options)
    54  
    55  // Transitional sets a Profile to use the Transitional mapping as defined in UTS
    56  // #46. This will cause, for example, "ß" to be mapped to "ss". Using the
    57  // transitional mapping provides a compromise between IDNA2003 and IDNA2008
    58  // compatibility. It is used by most browsers when resolving domain names. This
    59  // option is only meaningful if combined with MapForLookup.
    60  func Transitional(transitional bool) Option {
    61  	return func(o *options) { o.transitional = true }
    62  }
    63  
    64  // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
    65  // are longer than allowed by the RFC.
    66  func VerifyDNSLength(verify bool) Option {
    67  	return func(o *options) { o.verifyDNSLength = verify }
    68  }
    69  
    70  // ValidateLabels sets whether to check the mandatory label validation criteria
    71  // as defined in Section 5.4 of RFC 5891. This includes testing for correct use
    72  // of hyphens ('-'), normalization, validity of runes, and the context rules.
    73  func ValidateLabels(enable bool) Option {
    74  	return func(o *options) {
    75  		// Don't override existing mappings, but set one that at least checks
    76  		// normalization if it is not set.
    77  		if o.mapping == nil && enable {
    78  			o.mapping = normalize
    79  		}
    80  		o.trie = trie
    81  		o.validateLabels = enable
    82  		o.fromPuny = validateFromPunycode
    83  	}
    84  }
    85  
    86  // StrictDomainName limits the set of permissable ASCII characters to those
    87  // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
    88  // hyphen). This is set by default for MapForLookup and ValidateForRegistration.
    89  //
    90  // This option is useful, for instance, for browsers that allow characters
    91  // outside this range, for example a '_' (U+005F LOW LINE). See
    92  // http://www.rfc-editor.org/std/std3.txt for more details This option
    93  // corresponds to the UseSTD3ASCIIRules option in UTS #46.
    94  func StrictDomainName(use bool) Option {
    95  	return func(o *options) {
    96  		o.trie = trie
    97  		o.useSTD3Rules = use
    98  		o.fromPuny = validateFromPunycode
    99  	}
   100  }
   101  
   102  // NOTE: the following options pull in tables. The tables should not be linked
   103  // in as long as the options are not used.
   104  
   105  // BidiRule enables the Bidi rule as defined in RFC 5893. Any application
   106  // that relies on proper validation of labels should include this rule.
   107  func BidiRule() Option {
   108  	return func(o *options) { o.bidirule = bidirule.ValidString }
   109  }
   110  
   111  // ValidateForRegistration sets validation options to verify that a given IDN is
   112  // properly formatted for registration as defined by Section 4 of RFC 5891.
   113  func ValidateForRegistration() Option {
   114  	return func(o *options) {
   115  		o.mapping = validateRegistration
   116  		StrictDomainName(true)(o)
   117  		ValidateLabels(true)(o)
   118  		VerifyDNSLength(true)(o)
   119  		BidiRule()(o)
   120  	}
   121  }
   122  
   123  // MapForLookup sets validation and mapping options such that a given IDN is
   124  // transformed for domain name lookup according to the requirements set out in
   125  // Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
   126  // RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
   127  // to add this check.
   128  //
   129  // The mappings include normalization and mapping case, width and other
   130  // compatibility mappings.
   131  func MapForLookup() Option {
   132  	return func(o *options) {
   133  		o.mapping = validateAndMap
   134  		StrictDomainName(true)(o)
   135  		ValidateLabels(true)(o)
   136  	}
   137  }
   138  
   139  type options struct {
   140  	transitional    bool
   141  	useSTD3Rules    bool
   142  	validateLabels  bool
   143  	verifyDNSLength bool
   144  
   145  	trie *idnaTrie
   146  
   147  	// fromPuny calls validation rules when converting A-labels to U-labels.
   148  	fromPuny func(p *Profile, s string) error
   149  
   150  	// mapping implements a validation and mapping step as defined in RFC 5895
   151  	// or UTS 46, tailored to, for example, domain registration or lookup.
   152  	mapping func(p *Profile, s string) (string, error)
   153  
   154  	// bidirule, if specified, checks whether s conforms to the Bidi Rule
   155  	// defined in RFC 5893.
   156  	bidirule func(s string) bool
   157  }
   158  
   159  // A Profile defines the configuration of a IDNA mapper.
   160  type Profile struct {
   161  	options
   162  }
   163  
   164  func apply(o *options, opts []Option) {
   165  	for _, f := range opts {
   166  		f(o)
   167  	}
   168  }
   169  
   170  // New creates a new Profile.
   171  //
   172  // With no options, the returned Profile is the most permissive and equals the
   173  // Punycode Profile. Options can be passed to further restrict the Profile. The
   174  // MapForLookup and ValidateForRegistration options set a collection of options,
   175  // for lookup and registration purposes respectively, which can be tailored by
   176  // adding more fine-grained options, where later options override earlier
   177  // options.
   178  func New(o ...Option) *Profile {
   179  	p := &Profile{}
   180  	apply(&p.options, o)
   181  	return p
   182  }
   183  
   184  // ToASCII converts a domain or domain label to its ASCII form. For example,
   185  // ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
   186  // ToASCII("golang") is "golang". If an error is encountered it will return
   187  // an error and a (partially) processed result.
   188  func (p *Profile) ToASCII(s string) (string, error) {
   189  	return p.process(s, true)
   190  }
   191  
   192  // ToUnicode converts a domain or domain label to its Unicode form. For example,
   193  // ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
   194  // ToUnicode("golang") is "golang". If an error is encountered it will return
   195  // an error and a (partially) processed result.
   196  func (p *Profile) ToUnicode(s string) (string, error) {
   197  	pp := *p
   198  	pp.transitional = false
   199  	return pp.process(s, false)
   200  }
   201  
   202  // String reports a string with a description of the profile for debugging
   203  // purposes. The string format may change with different versions.
   204  func (p *Profile) String() string {
   205  	s := ""
   206  	if p.transitional {
   207  		s = "Transitional"
   208  	} else {
   209  		s = "NonTransitional"
   210  	}
   211  	if p.useSTD3Rules {
   212  		s += ":UseSTD3Rules"
   213  	}
   214  	if p.validateLabels {
   215  		s += ":ValidateLabels"
   216  	}
   217  	if p.verifyDNSLength {
   218  		s += ":VerifyDNSLength"
   219  	}
   220  	return s
   221  }
   222  
   223  var (
   224  	// Punycode is a Profile that does raw punycode processing with a minimum
   225  	// of validation.
   226  	Punycode *Profile = punycode
   227  
   228  	// Lookup is the recommended profile for looking up domain names, according
   229  	// to Section 5 of RFC 5891. The exact configuration of this profile may
   230  	// change over time.
   231  	Lookup *Profile = lookup
   232  
   233  	// Display is the recommended profile for displaying domain names.
   234  	// The configuration of this profile may change over time.
   235  	Display *Profile = display
   236  
   237  	// Registration is the recommended profile for checking whether a given
   238  	// IDN is valid for registration, according to Section 4 of RFC 5891.
   239  	Registration *Profile = registration
   240  
   241  	punycode = &Profile{}
   242  	lookup   = &Profile{options{
   243  		transitional:   true,
   244  		useSTD3Rules:   true,
   245  		validateLabels: true,
   246  		trie:           trie,
   247  		fromPuny:       validateFromPunycode,
   248  		mapping:        validateAndMap,
   249  		bidirule:       bidirule.ValidString,
   250  	}}
   251  	display = &Profile{options{
   252  		useSTD3Rules:   true,
   253  		validateLabels: true,
   254  		trie:           trie,
   255  		fromPuny:       validateFromPunycode,
   256  		mapping:        validateAndMap,
   257  		bidirule:       bidirule.ValidString,
   258  	}}
   259  	registration = &Profile{options{
   260  		useSTD3Rules:    true,
   261  		validateLabels:  true,
   262  		verifyDNSLength: true,
   263  		trie:            trie,
   264  		fromPuny:        validateFromPunycode,
   265  		mapping:         validateRegistration,
   266  		bidirule:        bidirule.ValidString,
   267  	}}
   268  
   269  	// TODO: profiles
   270  	// Register: recommended for approving domain names: don't do any mappings
   271  	// but rather reject on invalid input. Bundle or block deviation characters.
   272  )
   273  
   274  type labelError struct{ label, code_ string }
   275  
   276  func (e labelError) code() string { return e.code_ }
   277  func (e labelError) Error() string {
   278  	return fmt.Sprintf("idna: invalid label %q", e.label)
   279  }
   280  
   281  type runeError rune
   282  
   283  func (e runeError) code() string { return "P1" }
   284  func (e runeError) Error() string {
   285  	return fmt.Sprintf("idna: disallowed rune %U", e)
   286  }
   287  
   288  // process implements the algorithm described in section 4 of UTS #46,
   289  // see http://www.unicode.org/reports/tr46.
   290  func (p *Profile) process(s string, toASCII bool) (string, error) {
   291  	var err error
   292  	if p.mapping != nil {
   293  		s, err = p.mapping(p, s)
   294  	}
   295  	// Remove leading empty labels.
   296  	for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
   297  	}
   298  	// It seems like we should only create this error on ToASCII, but the
   299  	// UTS 46 conformance tests suggests we should always check this.
   300  	if err == nil && p.verifyDNSLength && s == "" {
   301  		err = &labelError{s, "A4"}
   302  	}
   303  	labels := labelIter{orig: s}
   304  	for ; !labels.done(); labels.next() {
   305  		label := labels.label()
   306  		if label == "" {
   307  			// Empty labels are not okay. The label iterator skips the last
   308  			// label if it is empty.
   309  			if err == nil && p.verifyDNSLength {
   310  				err = &labelError{s, "A4"}
   311  			}
   312  			continue
   313  		}
   314  		if strings.HasPrefix(label, acePrefix) {
   315  			u, err2 := decode(label[len(acePrefix):])
   316  			if err2 != nil {
   317  				if err == nil {
   318  					err = err2
   319  				}
   320  				// Spec says keep the old label.
   321  				continue
   322  			}
   323  			labels.set(u)
   324  			if err == nil && p.validateLabels {
   325  				err = p.fromPuny(p, u)
   326  			}
   327  			if err == nil {
   328  				// This should be called on NonTransitional, according to the
   329  				// spec, but that currently does not have any effect. Use the
   330  				// original profile to preserve options.
   331  				err = p.validateLabel(u)
   332  			}
   333  		} else if err == nil {
   334  			err = p.validateLabel(label)
   335  		}
   336  	}
   337  	if toASCII {
   338  		for labels.reset(); !labels.done(); labels.next() {
   339  			label := labels.label()
   340  			if !ascii(label) {
   341  				a, err2 := encode(acePrefix, label)
   342  				if err == nil {
   343  					err = err2
   344  				}
   345  				label = a
   346  				labels.set(a)
   347  			}
   348  			n := len(label)
   349  			if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
   350  				err = &labelError{label, "A4"}
   351  			}
   352  		}
   353  	}
   354  	s = labels.result()
   355  	if toASCII && p.verifyDNSLength && err == nil {
   356  		// Compute the length of the domain name minus the root label and its dot.
   357  		n := len(s)
   358  		if n > 0 && s[n-1] == '.' {
   359  			n--
   360  		}
   361  		if len(s) < 1 || n > 253 {
   362  			err = &labelError{s, "A4"}
   363  		}
   364  	}
   365  	return s, err
   366  }
   367  
   368  func normalize(p *Profile, s string) (string, error) {
   369  	return norm.NFC.String(s), nil
   370  }
   371  
   372  func validateRegistration(p *Profile, s string) (string, error) {
   373  	if !norm.NFC.IsNormalString(s) {
   374  		return s, &labelError{s, "V1"}
   375  	}
   376  	var err error
   377  	for i := 0; i < len(s); {
   378  		v, sz := trie.lookupString(s[i:])
   379  		i += sz
   380  		// Copy bytes not copied so far.
   381  		switch p.simplify(info(v).category()) {
   382  		// TODO: handle the NV8 defined in the Unicode idna data set to allow
   383  		// for strict conformance to IDNA2008.
   384  		case valid, deviation:
   385  		case disallowed, mapped, unknown, ignored:
   386  			if err == nil {
   387  				r, _ := utf8.DecodeRuneInString(s[i:])
   388  				err = runeError(r)
   389  			}
   390  		}
   391  	}
   392  	return s, err
   393  }
   394  
   395  func validateAndMap(p *Profile, s string) (string, error) {
   396  	var (
   397  		err error
   398  		b   []byte
   399  		k   int
   400  	)
   401  	for i := 0; i < len(s); {
   402  		v, sz := trie.lookupString(s[i:])
   403  		start := i
   404  		i += sz
   405  		// Copy bytes not copied so far.
   406  		switch p.simplify(info(v).category()) {
   407  		case valid:
   408  			continue
   409  		case disallowed:
   410  			if err == nil {
   411  				r, _ := utf8.DecodeRuneInString(s[i:])
   412  				err = runeError(r)
   413  			}
   414  			continue
   415  		case mapped, deviation:
   416  			b = append(b, s[k:start]...)
   417  			b = info(v).appendMapping(b, s[start:i])
   418  		case ignored:
   419  			b = append(b, s[k:start]...)
   420  			// drop the rune
   421  		case unknown:
   422  			b = append(b, s[k:start]...)
   423  			b = append(b, "\ufffd"...)
   424  		}
   425  		k = i
   426  	}
   427  	if k == 0 {
   428  		// No changes so far.
   429  		s = norm.NFC.String(s)
   430  	} else {
   431  		b = append(b, s[k:]...)
   432  		if norm.NFC.QuickSpan(b) != len(b) {
   433  			b = norm.NFC.Bytes(b)
   434  		}
   435  		// TODO: the punycode converters require strings as input.
   436  		s = string(b)
   437  	}
   438  	return s, err
   439  }
   440  
   441  // A labelIter allows iterating over domain name labels.
   442  type labelIter struct {
   443  	orig     string
   444  	slice    []string
   445  	curStart int
   446  	curEnd   int
   447  	i        int
   448  }
   449  
   450  func (l *labelIter) reset() {
   451  	l.curStart = 0
   452  	l.curEnd = 0
   453  	l.i = 0
   454  }
   455  
   456  func (l *labelIter) done() bool {
   457  	return l.curStart >= len(l.orig)
   458  }
   459  
   460  func (l *labelIter) result() string {
   461  	if l.slice != nil {
   462  		return strings.Join(l.slice, ".")
   463  	}
   464  	return l.orig
   465  }
   466  
   467  func (l *labelIter) label() string {
   468  	if l.slice != nil {
   469  		return l.slice[l.i]
   470  	}
   471  	p := strings.IndexByte(l.orig[l.curStart:], '.')
   472  	l.curEnd = l.curStart + p
   473  	if p == -1 {
   474  		l.curEnd = len(l.orig)
   475  	}
   476  	return l.orig[l.curStart:l.curEnd]
   477  }
   478  
   479  // next sets the value to the next label. It skips the last label if it is empty.
   480  func (l *labelIter) next() {
   481  	l.i++
   482  	if l.slice != nil {
   483  		if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
   484  			l.curStart = len(l.orig)
   485  		}
   486  	} else {
   487  		l.curStart = l.curEnd + 1
   488  		if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
   489  			l.curStart = len(l.orig)
   490  		}
   491  	}
   492  }
   493  
   494  func (l *labelIter) set(s string) {
   495  	if l.slice == nil {
   496  		l.slice = strings.Split(l.orig, ".")
   497  	}
   498  	l.slice[l.i] = s
   499  }
   500  
   501  // acePrefix is the ASCII Compatible Encoding prefix.
   502  const acePrefix = "xn--"
   503  
   504  func (p *Profile) simplify(cat category) category {
   505  	switch cat {
   506  	case disallowedSTD3Mapped:
   507  		if p.useSTD3Rules {
   508  			cat = disallowed
   509  		} else {
   510  			cat = mapped
   511  		}
   512  	case disallowedSTD3Valid:
   513  		if p.useSTD3Rules {
   514  			cat = disallowed
   515  		} else {
   516  			cat = valid
   517  		}
   518  	case deviation:
   519  		if !p.transitional {
   520  			cat = valid
   521  		}
   522  	case validNV8, validXV8:
   523  		// TODO: handle V2008
   524  		cat = valid
   525  	}
   526  	return cat
   527  }
   528  
   529  func validateFromPunycode(p *Profile, s string) error {
   530  	if !norm.NFC.IsNormalString(s) {
   531  		return &labelError{s, "V1"}
   532  	}
   533  	for i := 0; i < len(s); {
   534  		v, sz := trie.lookupString(s[i:])
   535  		if c := p.simplify(info(v).category()); c != valid && c != deviation {
   536  			return &labelError{s, "V6"}
   537  		}
   538  		i += sz
   539  	}
   540  	return nil
   541  }
   542  
   543  const (
   544  	zwnj = "\u200c"
   545  	zwj  = "\u200d"
   546  )
   547  
   548  type joinState int8
   549  
   550  const (
   551  	stateStart joinState = iota
   552  	stateVirama
   553  	stateBefore
   554  	stateBeforeVirama
   555  	stateAfter
   556  	stateFAIL
   557  )
   558  
   559  var joinStates = [][numJoinTypes]joinState{
   560  	stateStart: {
   561  		joiningL:   stateBefore,
   562  		joiningD:   stateBefore,
   563  		joinZWNJ:   stateFAIL,
   564  		joinZWJ:    stateFAIL,
   565  		joinVirama: stateVirama,
   566  	},
   567  	stateVirama: {
   568  		joiningL: stateBefore,
   569  		joiningD: stateBefore,
   570  	},
   571  	stateBefore: {
   572  		joiningL:   stateBefore,
   573  		joiningD:   stateBefore,
   574  		joiningT:   stateBefore,
   575  		joinZWNJ:   stateAfter,
   576  		joinZWJ:    stateFAIL,
   577  		joinVirama: stateBeforeVirama,
   578  	},
   579  	stateBeforeVirama: {
   580  		joiningL: stateBefore,
   581  		joiningD: stateBefore,
   582  		joiningT: stateBefore,
   583  	},
   584  	stateAfter: {
   585  		joiningL:   stateFAIL,
   586  		joiningD:   stateBefore,
   587  		joiningT:   stateAfter,
   588  		joiningR:   stateStart,
   589  		joinZWNJ:   stateFAIL,
   590  		joinZWJ:    stateFAIL,
   591  		joinVirama: stateAfter, // no-op as we can't accept joiners here
   592  	},
   593  	stateFAIL: {
   594  		0:          stateFAIL,
   595  		joiningL:   stateFAIL,
   596  		joiningD:   stateFAIL,
   597  		joiningT:   stateFAIL,
   598  		joiningR:   stateFAIL,
   599  		joinZWNJ:   stateFAIL,
   600  		joinZWJ:    stateFAIL,
   601  		joinVirama: stateFAIL,
   602  	},
   603  }
   604  
   605  // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
   606  // already implicitly satisfied by the overall implementation.
   607  func (p *Profile) validateLabel(s string) error {
   608  	if s == "" {
   609  		if p.verifyDNSLength {
   610  			return &labelError{s, "A4"}
   611  		}
   612  		return nil
   613  	}
   614  	if p.bidirule != nil && !p.bidirule(s) {
   615  		return &labelError{s, "B"}
   616  	}
   617  	if !p.validateLabels {
   618  		return nil
   619  	}
   620  	trie := p.trie // p.validateLabels is only set if trie is set.
   621  	if len(s) > 4 && s[2] == '-' && s[3] == '-' {
   622  		return &labelError{s, "V2"}
   623  	}
   624  	if s[0] == '-' || s[len(s)-1] == '-' {
   625  		return &labelError{s, "V3"}
   626  	}
   627  	// TODO: merge the use of this in the trie.
   628  	v, sz := trie.lookupString(s)
   629  	x := info(v)
   630  	if x.isModifier() {
   631  		return &labelError{s, "V5"}
   632  	}
   633  	// Quickly return in the absence of zero-width (non) joiners.
   634  	if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
   635  		return nil
   636  	}
   637  	st := stateStart
   638  	for i := 0; ; {
   639  		jt := x.joinType()
   640  		if s[i:i+sz] == zwj {
   641  			jt = joinZWJ
   642  		} else if s[i:i+sz] == zwnj {
   643  			jt = joinZWNJ
   644  		}
   645  		st = joinStates[st][jt]
   646  		if x.isViramaModifier() {
   647  			st = joinStates[st][joinVirama]
   648  		}
   649  		if i += sz; i == len(s) {
   650  			break
   651  		}
   652  		v, sz = trie.lookupString(s[i:])
   653  		x = info(v)
   654  	}
   655  	if st == stateFAIL || st == stateAfter {
   656  		return &labelError{s, "C"}
   657  	}
   658  	return nil
   659  }
   660  
   661  func ascii(s string) bool {
   662  	for i := 0; i < len(s); i++ {
   663  		if s[i] >= utf8.RuneSelf {
   664  			return false
   665  		}
   666  	}
   667  	return true
   668  }