github.com/Andyfoo/golang/x/net@v0.0.0-20190901054642-57c1bf301704/idna/idna.go (about)

     1  // Code generated by running "go generate" in github.com/Andyfoo/golang/x/text. DO NOT EDIT.
     2  
     3  // Copyright 2016 The Go Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  // Package idna implements IDNA2008 using the compatibility processing
     8  // defined by UTS (Unicode Technical Standard) #46, which defines a standard to
     9  // deal with the transition from IDNA2003.
    10  //
    11  // IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
    12  // 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
    13  // UTS #46 is defined in http://www.unicode.org/reports/tr46.
    14  // See http://unicode.org/cldr/utility/idna.jsp for a visualization of the
    15  // differences between these two standards.
    16  package idna // import "github.com/Andyfoo/golang/x/net/idna"
    17  
    18  import (
    19  	"fmt"
    20  	"strings"
    21  	"unicode/utf8"
    22  
    23  	"github.com/Andyfoo/golang/x/text/secure/bidirule"
    24  	"github.com/Andyfoo/golang/x/text/unicode/bidi"
    25  	"github.com/Andyfoo/golang/x/text/unicode/norm"
    26  )
    27  
    28  // NOTE: Unlike common practice in Go APIs, the functions will return a
    29  // sanitized domain name in case of errors. Browsers sometimes use a partially
    30  // evaluated string as lookup.
    31  // TODO: the current error handling is, in my opinion, the least opinionated.
    32  // Other strategies are also viable, though:
    33  // Option 1) Return an empty string in case of error, but allow the user to
    34  //    specify explicitly which errors to ignore.
    35  // Option 2) Return the partially evaluated string if it is itself a valid
    36  //    string, otherwise return the empty string in case of error.
    37  // Option 3) Option 1 and 2.
    38  // Option 4) Always return an empty string for now and implement Option 1 as
    39  //    needed, and document that the return string may not be empty in case of
    40  //    error in the future.
    41  // I think Option 1 is best, but it is quite opinionated.
    42  
    43  // ToASCII is a wrapper for Punycode.ToASCII.
    44  func ToASCII(s string) (string, error) {
    45  	return Punycode.process(s, true)
    46  }
    47  
    48  // ToUnicode is a wrapper for Punycode.ToUnicode.
    49  func ToUnicode(s string) (string, error) {
    50  	return Punycode.process(s, false)
    51  }
    52  
    53  // An Option configures a Profile at creation time.
    54  type Option func(*options)
    55  
    56  // Transitional sets a Profile to use the Transitional mapping as defined in UTS
    57  // #46. This will cause, for example, "ß" to be mapped to "ss". Using the
    58  // transitional mapping provides a compromise between IDNA2003 and IDNA2008
    59  // compatibility. It is used by most browsers when resolving domain names. This
    60  // option is only meaningful if combined with MapForLookup.
    61  func Transitional(transitional bool) Option {
    62  	return func(o *options) { o.transitional = true }
    63  }
    64  
    65  // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
    66  // are longer than allowed by the RFC.
    67  func VerifyDNSLength(verify bool) Option {
    68  	return func(o *options) { o.verifyDNSLength = verify }
    69  }
    70  
    71  // RemoveLeadingDots removes leading label separators. Leading runes that map to
    72  // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
    73  //
    74  // This is the behavior suggested by the UTS #46 and is adopted by some
    75  // browsers.
    76  func RemoveLeadingDots(remove bool) Option {
    77  	return func(o *options) { o.removeLeadingDots = remove }
    78  }
    79  
    80  // ValidateLabels sets whether to check the mandatory label validation criteria
    81  // as defined in Section 5.4 of RFC 5891. This includes testing for correct use
    82  // of hyphens ('-'), normalization, validity of runes, and the context rules.
    83  func ValidateLabels(enable bool) Option {
    84  	return func(o *options) {
    85  		// Don't override existing mappings, but set one that at least checks
    86  		// normalization if it is not set.
    87  		if o.mapping == nil && enable {
    88  			o.mapping = normalize
    89  		}
    90  		o.trie = trie
    91  		o.validateLabels = enable
    92  		o.fromPuny = validateFromPunycode
    93  	}
    94  }
    95  
    96  // StrictDomainName limits the set of permissible ASCII characters to those
    97  // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
    98  // hyphen). This is set by default for MapForLookup and ValidateForRegistration.
    99  //
   100  // This option is useful, for instance, for browsers that allow characters
   101  // outside this range, for example a '_' (U+005F LOW LINE). See
   102  // http://www.rfc-editor.org/std/std3.txt for more details This option
   103  // corresponds to the UseSTD3ASCIIRules option in UTS #46.
   104  func StrictDomainName(use bool) Option {
   105  	return func(o *options) {
   106  		o.trie = trie
   107  		o.useSTD3Rules = use
   108  		o.fromPuny = validateFromPunycode
   109  	}
   110  }
   111  
   112  // NOTE: the following options pull in tables. The tables should not be linked
   113  // in as long as the options are not used.
   114  
   115  // BidiRule enables the Bidi rule as defined in RFC 5893. Any application
   116  // that relies on proper validation of labels should include this rule.
   117  func BidiRule() Option {
   118  	return func(o *options) { o.bidirule = bidirule.ValidString }
   119  }
   120  
   121  // ValidateForRegistration sets validation options to verify that a given IDN is
   122  // properly formatted for registration as defined by Section 4 of RFC 5891.
   123  func ValidateForRegistration() Option {
   124  	return func(o *options) {
   125  		o.mapping = validateRegistration
   126  		StrictDomainName(true)(o)
   127  		ValidateLabels(true)(o)
   128  		VerifyDNSLength(true)(o)
   129  		BidiRule()(o)
   130  	}
   131  }
   132  
   133  // MapForLookup sets validation and mapping options such that a given IDN is
   134  // transformed for domain name lookup according to the requirements set out in
   135  // Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
   136  // RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
   137  // to add this check.
   138  //
   139  // The mappings include normalization and mapping case, width and other
   140  // compatibility mappings.
   141  func MapForLookup() Option {
   142  	return func(o *options) {
   143  		o.mapping = validateAndMap
   144  		StrictDomainName(true)(o)
   145  		ValidateLabels(true)(o)
   146  	}
   147  }
   148  
   149  type options struct {
   150  	transitional      bool
   151  	useSTD3Rules      bool
   152  	validateLabels    bool
   153  	verifyDNSLength   bool
   154  	removeLeadingDots bool
   155  
   156  	trie *idnaTrie
   157  
   158  	// fromPuny calls validation rules when converting A-labels to U-labels.
   159  	fromPuny func(p *Profile, s string) error
   160  
   161  	// mapping implements a validation and mapping step as defined in RFC 5895
   162  	// or UTS 46, tailored to, for example, domain registration or lookup.
   163  	mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)
   164  
   165  	// bidirule, if specified, checks whether s conforms to the Bidi Rule
   166  	// defined in RFC 5893.
   167  	bidirule func(s string) bool
   168  }
   169  
   170  // A Profile defines the configuration of an IDNA mapper.
   171  type Profile struct {
   172  	options
   173  }
   174  
   175  func apply(o *options, opts []Option) {
   176  	for _, f := range opts {
   177  		f(o)
   178  	}
   179  }
   180  
   181  // New creates a new Profile.
   182  //
   183  // With no options, the returned Profile is the most permissive and equals the
   184  // Punycode Profile. Options can be passed to further restrict the Profile. The
   185  // MapForLookup and ValidateForRegistration options set a collection of options,
   186  // for lookup and registration purposes respectively, which can be tailored by
   187  // adding more fine-grained options, where later options override earlier
   188  // options.
   189  func New(o ...Option) *Profile {
   190  	p := &Profile{}
   191  	apply(&p.options, o)
   192  	return p
   193  }
   194  
   195  // ToASCII converts a domain or domain label to its ASCII form. For example,
   196  // ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
   197  // ToASCII("golang") is "golang". If an error is encountered it will return
   198  // an error and a (partially) processed result.
   199  func (p *Profile) ToASCII(s string) (string, error) {
   200  	return p.process(s, true)
   201  }
   202  
   203  // ToUnicode converts a domain or domain label to its Unicode form. For example,
   204  // ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
   205  // ToUnicode("golang") is "golang". If an error is encountered it will return
   206  // an error and a (partially) processed result.
   207  func (p *Profile) ToUnicode(s string) (string, error) {
   208  	pp := *p
   209  	pp.transitional = false
   210  	return pp.process(s, false)
   211  }
   212  
   213  // String reports a string with a description of the profile for debugging
   214  // purposes. The string format may change with different versions.
   215  func (p *Profile) String() string {
   216  	s := ""
   217  	if p.transitional {
   218  		s = "Transitional"
   219  	} else {
   220  		s = "NonTransitional"
   221  	}
   222  	if p.useSTD3Rules {
   223  		s += ":UseSTD3Rules"
   224  	}
   225  	if p.validateLabels {
   226  		s += ":ValidateLabels"
   227  	}
   228  	if p.verifyDNSLength {
   229  		s += ":VerifyDNSLength"
   230  	}
   231  	return s
   232  }
   233  
   234  var (
   235  	// Punycode is a Profile that does raw punycode processing with a minimum
   236  	// of validation.
   237  	Punycode *Profile = punycode
   238  
   239  	// Lookup is the recommended profile for looking up domain names, according
   240  	// to Section 5 of RFC 5891. The exact configuration of this profile may
   241  	// change over time.
   242  	Lookup *Profile = lookup
   243  
   244  	// Display is the recommended profile for displaying domain names.
   245  	// The configuration of this profile may change over time.
   246  	Display *Profile = display
   247  
   248  	// Registration is the recommended profile for checking whether a given
   249  	// IDN is valid for registration, according to Section 4 of RFC 5891.
   250  	Registration *Profile = registration
   251  
   252  	punycode = &Profile{}
   253  	lookup   = &Profile{options{
   254  		transitional:   true,
   255  		useSTD3Rules:   true,
   256  		validateLabels: true,
   257  		trie:           trie,
   258  		fromPuny:       validateFromPunycode,
   259  		mapping:        validateAndMap,
   260  		bidirule:       bidirule.ValidString,
   261  	}}
   262  	display = &Profile{options{
   263  		useSTD3Rules:   true,
   264  		validateLabels: true,
   265  		trie:           trie,
   266  		fromPuny:       validateFromPunycode,
   267  		mapping:        validateAndMap,
   268  		bidirule:       bidirule.ValidString,
   269  	}}
   270  	registration = &Profile{options{
   271  		useSTD3Rules:    true,
   272  		validateLabels:  true,
   273  		verifyDNSLength: true,
   274  		trie:            trie,
   275  		fromPuny:        validateFromPunycode,
   276  		mapping:         validateRegistration,
   277  		bidirule:        bidirule.ValidString,
   278  	}}
   279  
   280  	// TODO: profiles
   281  	// Register: recommended for approving domain names: don't do any mappings
   282  	// but rather reject on invalid input. Bundle or block deviation characters.
   283  )
   284  
   285  type labelError struct{ label, code_ string }
   286  
   287  func (e labelError) code() string { return e.code_ }
   288  func (e labelError) Error() string {
   289  	return fmt.Sprintf("idna: invalid label %q", e.label)
   290  }
   291  
   292  type runeError rune
   293  
   294  func (e runeError) code() string { return "P1" }
   295  func (e runeError) Error() string {
   296  	return fmt.Sprintf("idna: disallowed rune %U", e)
   297  }
   298  
   299  // process implements the algorithm described in section 4 of UTS #46,
   300  // see http://www.unicode.org/reports/tr46.
   301  func (p *Profile) process(s string, toASCII bool) (string, error) {
   302  	var err error
   303  	var isBidi bool
   304  	if p.mapping != nil {
   305  		s, isBidi, err = p.mapping(p, s)
   306  	}
   307  	// Remove leading empty labels.
   308  	if p.removeLeadingDots {
   309  		for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
   310  		}
   311  	}
   312  	// TODO: allow for a quick check of the tables data.
   313  	// It seems like we should only create this error on ToASCII, but the
   314  	// UTS 46 conformance tests suggests we should always check this.
   315  	if err == nil && p.verifyDNSLength && s == "" {
   316  		err = &labelError{s, "A4"}
   317  	}
   318  	labels := labelIter{orig: s}
   319  	for ; !labels.done(); labels.next() {
   320  		label := labels.label()
   321  		if label == "" {
   322  			// Empty labels are not okay. The label iterator skips the last
   323  			// label if it is empty.
   324  			if err == nil && p.verifyDNSLength {
   325  				err = &labelError{s, "A4"}
   326  			}
   327  			continue
   328  		}
   329  		if strings.HasPrefix(label, acePrefix) {
   330  			u, err2 := decode(label[len(acePrefix):])
   331  			if err2 != nil {
   332  				if err == nil {
   333  					err = err2
   334  				}
   335  				// Spec says keep the old label.
   336  				continue
   337  			}
   338  			isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
   339  			labels.set(u)
   340  			if err == nil && p.validateLabels {
   341  				err = p.fromPuny(p, u)
   342  			}
   343  			if err == nil {
   344  				// This should be called on NonTransitional, according to the
   345  				// spec, but that currently does not have any effect. Use the
   346  				// original profile to preserve options.
   347  				err = p.validateLabel(u)
   348  			}
   349  		} else if err == nil {
   350  			err = p.validateLabel(label)
   351  		}
   352  	}
   353  	if isBidi && p.bidirule != nil && err == nil {
   354  		for labels.reset(); !labels.done(); labels.next() {
   355  			if !p.bidirule(labels.label()) {
   356  				err = &labelError{s, "B"}
   357  				break
   358  			}
   359  		}
   360  	}
   361  	if toASCII {
   362  		for labels.reset(); !labels.done(); labels.next() {
   363  			label := labels.label()
   364  			if !ascii(label) {
   365  				a, err2 := encode(acePrefix, label)
   366  				if err == nil {
   367  					err = err2
   368  				}
   369  				label = a
   370  				labels.set(a)
   371  			}
   372  			n := len(label)
   373  			if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
   374  				err = &labelError{label, "A4"}
   375  			}
   376  		}
   377  	}
   378  	s = labels.result()
   379  	if toASCII && p.verifyDNSLength && err == nil {
   380  		// Compute the length of the domain name minus the root label and its dot.
   381  		n := len(s)
   382  		if n > 0 && s[n-1] == '.' {
   383  			n--
   384  		}
   385  		if len(s) < 1 || n > 253 {
   386  			err = &labelError{s, "A4"}
   387  		}
   388  	}
   389  	return s, err
   390  }
   391  
   392  func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {
   393  	// TODO: consider first doing a quick check to see if any of these checks
   394  	// need to be done. This will make it slower in the general case, but
   395  	// faster in the common case.
   396  	mapped = norm.NFC.String(s)
   397  	isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft
   398  	return mapped, isBidi, nil
   399  }
   400  
   401  func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {
   402  	// TODO: filter need for normalization in loop below.
   403  	if !norm.NFC.IsNormalString(s) {
   404  		return s, false, &labelError{s, "V1"}
   405  	}
   406  	for i := 0; i < len(s); {
   407  		v, sz := trie.lookupString(s[i:])
   408  		if sz == 0 {
   409  			return s, bidi, runeError(utf8.RuneError)
   410  		}
   411  		bidi = bidi || info(v).isBidi(s[i:])
   412  		// Copy bytes not copied so far.
   413  		switch p.simplify(info(v).category()) {
   414  		// TODO: handle the NV8 defined in the Unicode idna data set to allow
   415  		// for strict conformance to IDNA2008.
   416  		case valid, deviation:
   417  		case disallowed, mapped, unknown, ignored:
   418  			r, _ := utf8.DecodeRuneInString(s[i:])
   419  			return s, bidi, runeError(r)
   420  		}
   421  		i += sz
   422  	}
   423  	return s, bidi, nil
   424  }
   425  
   426  func (c info) isBidi(s string) bool {
   427  	if !c.isMapped() {
   428  		return c&attributesMask == rtl
   429  	}
   430  	// TODO: also store bidi info for mapped data. This is possible, but a bit
   431  	// cumbersome and not for the common case.
   432  	p, _ := bidi.LookupString(s)
   433  	switch p.Class() {
   434  	case bidi.R, bidi.AL, bidi.AN:
   435  		return true
   436  	}
   437  	return false
   438  }
   439  
   440  func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {
   441  	var (
   442  		b []byte
   443  		k int
   444  	)
   445  	// combinedInfoBits contains the or-ed bits of all runes. We use this
   446  	// to derive the mayNeedNorm bit later. This may trigger normalization
   447  	// overeagerly, but it will not do so in the common case. The end result
   448  	// is another 10% saving on BenchmarkProfile for the common case.
   449  	var combinedInfoBits info
   450  	for i := 0; i < len(s); {
   451  		v, sz := trie.lookupString(s[i:])
   452  		if sz == 0 {
   453  			b = append(b, s[k:i]...)
   454  			b = append(b, "\ufffd"...)
   455  			k = len(s)
   456  			if err == nil {
   457  				err = runeError(utf8.RuneError)
   458  			}
   459  			break
   460  		}
   461  		combinedInfoBits |= info(v)
   462  		bidi = bidi || info(v).isBidi(s[i:])
   463  		start := i
   464  		i += sz
   465  		// Copy bytes not copied so far.
   466  		switch p.simplify(info(v).category()) {
   467  		case valid:
   468  			continue
   469  		case disallowed:
   470  			if err == nil {
   471  				r, _ := utf8.DecodeRuneInString(s[start:])
   472  				err = runeError(r)
   473  			}
   474  			continue
   475  		case mapped, deviation:
   476  			b = append(b, s[k:start]...)
   477  			b = info(v).appendMapping(b, s[start:i])
   478  		case ignored:
   479  			b = append(b, s[k:start]...)
   480  			// drop the rune
   481  		case unknown:
   482  			b = append(b, s[k:start]...)
   483  			b = append(b, "\ufffd"...)
   484  		}
   485  		k = i
   486  	}
   487  	if k == 0 {
   488  		// No changes so far.
   489  		if combinedInfoBits&mayNeedNorm != 0 {
   490  			s = norm.NFC.String(s)
   491  		}
   492  	} else {
   493  		b = append(b, s[k:]...)
   494  		if norm.NFC.QuickSpan(b) != len(b) {
   495  			b = norm.NFC.Bytes(b)
   496  		}
   497  		// TODO: the punycode converters require strings as input.
   498  		s = string(b)
   499  	}
   500  	return s, bidi, err
   501  }
   502  
   503  // A labelIter allows iterating over domain name labels.
   504  type labelIter struct {
   505  	orig     string
   506  	slice    []string
   507  	curStart int
   508  	curEnd   int
   509  	i        int
   510  }
   511  
   512  func (l *labelIter) reset() {
   513  	l.curStart = 0
   514  	l.curEnd = 0
   515  	l.i = 0
   516  }
   517  
   518  func (l *labelIter) done() bool {
   519  	return l.curStart >= len(l.orig)
   520  }
   521  
   522  func (l *labelIter) result() string {
   523  	if l.slice != nil {
   524  		return strings.Join(l.slice, ".")
   525  	}
   526  	return l.orig
   527  }
   528  
   529  func (l *labelIter) label() string {
   530  	if l.slice != nil {
   531  		return l.slice[l.i]
   532  	}
   533  	p := strings.IndexByte(l.orig[l.curStart:], '.')
   534  	l.curEnd = l.curStart + p
   535  	if p == -1 {
   536  		l.curEnd = len(l.orig)
   537  	}
   538  	return l.orig[l.curStart:l.curEnd]
   539  }
   540  
   541  // next sets the value to the next label. It skips the last label if it is empty.
   542  func (l *labelIter) next() {
   543  	l.i++
   544  	if l.slice != nil {
   545  		if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
   546  			l.curStart = len(l.orig)
   547  		}
   548  	} else {
   549  		l.curStart = l.curEnd + 1
   550  		if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
   551  			l.curStart = len(l.orig)
   552  		}
   553  	}
   554  }
   555  
   556  func (l *labelIter) set(s string) {
   557  	if l.slice == nil {
   558  		l.slice = strings.Split(l.orig, ".")
   559  	}
   560  	l.slice[l.i] = s
   561  }
   562  
   563  // acePrefix is the ASCII Compatible Encoding prefix.
   564  const acePrefix = "xn--"
   565  
   566  func (p *Profile) simplify(cat category) category {
   567  	switch cat {
   568  	case disallowedSTD3Mapped:
   569  		if p.useSTD3Rules {
   570  			cat = disallowed
   571  		} else {
   572  			cat = mapped
   573  		}
   574  	case disallowedSTD3Valid:
   575  		if p.useSTD3Rules {
   576  			cat = disallowed
   577  		} else {
   578  			cat = valid
   579  		}
   580  	case deviation:
   581  		if !p.transitional {
   582  			cat = valid
   583  		}
   584  	case validNV8, validXV8:
   585  		// TODO: handle V2008
   586  		cat = valid
   587  	}
   588  	return cat
   589  }
   590  
   591  func validateFromPunycode(p *Profile, s string) error {
   592  	if !norm.NFC.IsNormalString(s) {
   593  		return &labelError{s, "V1"}
   594  	}
   595  	// TODO: detect whether string may have to be normalized in the following
   596  	// loop.
   597  	for i := 0; i < len(s); {
   598  		v, sz := trie.lookupString(s[i:])
   599  		if sz == 0 {
   600  			return runeError(utf8.RuneError)
   601  		}
   602  		if c := p.simplify(info(v).category()); c != valid && c != deviation {
   603  			return &labelError{s, "V6"}
   604  		}
   605  		i += sz
   606  	}
   607  	return nil
   608  }
   609  
   610  const (
   611  	zwnj = "\u200c"
   612  	zwj  = "\u200d"
   613  )
   614  
   615  type joinState int8
   616  
   617  const (
   618  	stateStart joinState = iota
   619  	stateVirama
   620  	stateBefore
   621  	stateBeforeVirama
   622  	stateAfter
   623  	stateFAIL
   624  )
   625  
   626  var joinStates = [][numJoinTypes]joinState{
   627  	stateStart: {
   628  		joiningL:   stateBefore,
   629  		joiningD:   stateBefore,
   630  		joinZWNJ:   stateFAIL,
   631  		joinZWJ:    stateFAIL,
   632  		joinVirama: stateVirama,
   633  	},
   634  	stateVirama: {
   635  		joiningL: stateBefore,
   636  		joiningD: stateBefore,
   637  	},
   638  	stateBefore: {
   639  		joiningL:   stateBefore,
   640  		joiningD:   stateBefore,
   641  		joiningT:   stateBefore,
   642  		joinZWNJ:   stateAfter,
   643  		joinZWJ:    stateFAIL,
   644  		joinVirama: stateBeforeVirama,
   645  	},
   646  	stateBeforeVirama: {
   647  		joiningL: stateBefore,
   648  		joiningD: stateBefore,
   649  		joiningT: stateBefore,
   650  	},
   651  	stateAfter: {
   652  		joiningL:   stateFAIL,
   653  		joiningD:   stateBefore,
   654  		joiningT:   stateAfter,
   655  		joiningR:   stateStart,
   656  		joinZWNJ:   stateFAIL,
   657  		joinZWJ:    stateFAIL,
   658  		joinVirama: stateAfter, // no-op as we can't accept joiners here
   659  	},
   660  	stateFAIL: {
   661  		0:          stateFAIL,
   662  		joiningL:   stateFAIL,
   663  		joiningD:   stateFAIL,
   664  		joiningT:   stateFAIL,
   665  		joiningR:   stateFAIL,
   666  		joinZWNJ:   stateFAIL,
   667  		joinZWJ:    stateFAIL,
   668  		joinVirama: stateFAIL,
   669  	},
   670  }
   671  
   672  // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
   673  // already implicitly satisfied by the overall implementation.
   674  func (p *Profile) validateLabel(s string) (err error) {
   675  	if s == "" {
   676  		if p.verifyDNSLength {
   677  			return &labelError{s, "A4"}
   678  		}
   679  		return nil
   680  	}
   681  	if !p.validateLabels {
   682  		return nil
   683  	}
   684  	trie := p.trie // p.validateLabels is only set if trie is set.
   685  	if len(s) > 4 && s[2] == '-' && s[3] == '-' {
   686  		return &labelError{s, "V2"}
   687  	}
   688  	if s[0] == '-' || s[len(s)-1] == '-' {
   689  		return &labelError{s, "V3"}
   690  	}
   691  	// TODO: merge the use of this in the trie.
   692  	v, sz := trie.lookupString(s)
   693  	x := info(v)
   694  	if x.isModifier() {
   695  		return &labelError{s, "V5"}
   696  	}
   697  	// Quickly return in the absence of zero-width (non) joiners.
   698  	if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
   699  		return nil
   700  	}
   701  	st := stateStart
   702  	for i := 0; ; {
   703  		jt := x.joinType()
   704  		if s[i:i+sz] == zwj {
   705  			jt = joinZWJ
   706  		} else if s[i:i+sz] == zwnj {
   707  			jt = joinZWNJ
   708  		}
   709  		st = joinStates[st][jt]
   710  		if x.isViramaModifier() {
   711  			st = joinStates[st][joinVirama]
   712  		}
   713  		if i += sz; i == len(s) {
   714  			break
   715  		}
   716  		v, sz = trie.lookupString(s[i:])
   717  		x = info(v)
   718  	}
   719  	if st == stateFAIL || st == stateAfter {
   720  		return &labelError{s, "C"}
   721  	}
   722  	return nil
   723  }
   724  
   725  func ascii(s string) bool {
   726  	for i := 0; i < len(s); i++ {
   727  		if s[i] >= utf8.RuneSelf {
   728  			return false
   729  		}
   730  	}
   731  	return true
   732  }