github.com/liquid-dev/text@v0.3.3-liquid/internal/export/idna/idna10.0.0.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build go1.10
     6  //go:generate go run gen.go gen_trieval.go gen_common.go
     7  
     8  // Package idna implements IDNA2008 using the compatibility processing
     9  // defined by UTS (Unicode Technical Standard) #46, which defines a standard to
    10  // deal with the transition from IDNA2003.
    11  //
    12  // IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
    13  // 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
    14  // UTS #46 is defined in https://www.unicode.org/reports/tr46.
    15  // See https://unicode.org/cldr/utility/idna.jsp for a visualization of the
    16  // differences between these two standards.
    17  package idna // import "github.com/liquid-dev/text/internal/export/idna"
    18  
    19  import (
    20  	"fmt"
    21  	"strings"
    22  	"unicode/utf8"
    23  
    24  	"github.com/liquid-dev/text/secure/bidirule"
    25  	"github.com/liquid-dev/text/unicode/bidi"
    26  	"github.com/liquid-dev/text/unicode/norm"
    27  )
    28  
    29  // NOTE: Unlike common practice in Go APIs, the functions will return a
    30  // sanitized domain name in case of errors. Browsers sometimes use a partially
    31  // evaluated string as lookup.
    32  // TODO: the current error handling is, in my opinion, the least opinionated.
    33  // Other strategies are also viable, though:
    34  // Option 1) Return an empty string in case of error, but allow the user to
    35  //    specify explicitly which errors to ignore.
    36  // Option 2) Return the partially evaluated string if it is itself a valid
    37  //    string, otherwise return the empty string in case of error.
    38  // Option 3) Option 1 and 2.
    39  // Option 4) Always return an empty string for now and implement Option 1 as
    40  //    needed, and document that the return string may not be empty in case of
    41  //    error in the future.
    42  // I think Option 1 is best, but it is quite opinionated.
    43  
    44  // ToASCII is a wrapper for Punycode.ToASCII.
    45  func ToASCII(s string) (string, error) {
    46  	return Punycode.process(s, true)
    47  }
    48  
    49  // ToUnicode is a wrapper for Punycode.ToUnicode.
    50  func ToUnicode(s string) (string, error) {
    51  	return Punycode.process(s, false)
    52  }
    53  
    54  // An Option configures a Profile at creation time.
    55  type Option func(*options)
    56  
    57  // Transitional sets a Profile to use the Transitional mapping as defined in UTS
    58  // #46. This will cause, for example, "ß" to be mapped to "ss". Using the
    59  // transitional mapping provides a compromise between IDNA2003 and IDNA2008
    60  // compatibility. It is used by most browsers when resolving domain names. This
    61  // option is only meaningful if combined with MapForLookup.
    62  func Transitional(transitional bool) Option {
    63  	return func(o *options) { o.transitional = true }
    64  }
    65  
    66  // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
    67  // are longer than allowed by the RFC.
    68  func VerifyDNSLength(verify bool) Option {
    69  	return func(o *options) { o.verifyDNSLength = verify }
    70  }
    71  
    72  // RemoveLeadingDots removes leading label separators. Leading runes that map to
    73  // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
    74  //
    75  // This is the behavior suggested by the UTS #46 and is adopted by some
    76  // browsers.
    77  func RemoveLeadingDots(remove bool) Option {
    78  	return func(o *options) { o.removeLeadingDots = remove }
    79  }
    80  
    81  // ValidateLabels sets whether to check the mandatory label validation criteria
    82  // as defined in Section 5.4 of RFC 5891. This includes testing for correct use
    83  // of hyphens ('-'), normalization, validity of runes, and the context rules.
    84  func ValidateLabels(enable bool) Option {
    85  	return func(o *options) {
    86  		// Don't override existing mappings, but set one that at least checks
    87  		// normalization if it is not set.
    88  		if o.mapping == nil && enable {
    89  			o.mapping = normalize
    90  		}
    91  		o.trie = trie
    92  		o.validateLabels = enable
    93  		o.fromPuny = validateFromPunycode
    94  	}
    95  }
    96  
    97  // StrictDomainName limits the set of permissible ASCII characters to those
    98  // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
    99  // hyphen). This is set by default for MapForLookup and ValidateForRegistration.
   100  //
   101  // This option is useful, for instance, for browsers that allow characters
   102  // outside this range, for example a '_' (U+005F LOW LINE). See
   103  // http://www.rfc-editor.org/std/std3.txt for more details This option
   104  // corresponds to the UseSTD3ASCIIRules option in UTS #46.
   105  func StrictDomainName(use bool) Option {
   106  	return func(o *options) {
   107  		o.trie = trie
   108  		o.useSTD3Rules = use
   109  		o.fromPuny = validateFromPunycode
   110  	}
   111  }
   112  
   113  // NOTE: the following options pull in tables. The tables should not be linked
   114  // in as long as the options are not used.
   115  
   116  // BidiRule enables the Bidi rule as defined in RFC 5893. Any application
   117  // that relies on proper validation of labels should include this rule.
   118  func BidiRule() Option {
   119  	return func(o *options) { o.bidirule = bidirule.ValidString }
   120  }
   121  
   122  // ValidateForRegistration sets validation options to verify that a given IDN is
   123  // properly formatted for registration as defined by Section 4 of RFC 5891.
   124  func ValidateForRegistration() Option {
   125  	return func(o *options) {
   126  		o.mapping = validateRegistration
   127  		StrictDomainName(true)(o)
   128  		ValidateLabels(true)(o)
   129  		VerifyDNSLength(true)(o)
   130  		BidiRule()(o)
   131  	}
   132  }
   133  
   134  // MapForLookup sets validation and mapping options such that a given IDN is
   135  // transformed for domain name lookup according to the requirements set out in
   136  // Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
   137  // RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
   138  // to add this check.
   139  //
   140  // The mappings include normalization and mapping case, width and other
   141  // compatibility mappings.
   142  func MapForLookup() Option {
   143  	return func(o *options) {
   144  		o.mapping = validateAndMap
   145  		StrictDomainName(true)(o)
   146  		ValidateLabels(true)(o)
   147  	}
   148  }
   149  
   150  type options struct {
   151  	transitional      bool
   152  	useSTD3Rules      bool
   153  	validateLabels    bool
   154  	verifyDNSLength   bool
   155  	removeLeadingDots bool
   156  
   157  	trie *idnaTrie
   158  
   159  	// fromPuny calls validation rules when converting A-labels to U-labels.
   160  	fromPuny func(p *Profile, s string) error
   161  
   162  	// mapping implements a validation and mapping step as defined in RFC 5895
   163  	// or UTS 46, tailored to, for example, domain registration or lookup.
   164  	mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)
   165  
   166  	// bidirule, if specified, checks whether s conforms to the Bidi Rule
   167  	// defined in RFC 5893.
   168  	bidirule func(s string) bool
   169  }
   170  
   171  // A Profile defines the configuration of an IDNA mapper.
   172  type Profile struct {
   173  	options
   174  }
   175  
   176  func apply(o *options, opts []Option) {
   177  	for _, f := range opts {
   178  		f(o)
   179  	}
   180  }
   181  
   182  // New creates a new Profile.
   183  //
   184  // With no options, the returned Profile is the most permissive and equals the
   185  // Punycode Profile. Options can be passed to further restrict the Profile. The
   186  // MapForLookup and ValidateForRegistration options set a collection of options,
   187  // for lookup and registration purposes respectively, which can be tailored by
   188  // adding more fine-grained options, where later options override earlier
   189  // options.
   190  func New(o ...Option) *Profile {
   191  	p := &Profile{}
   192  	apply(&p.options, o)
   193  	return p
   194  }
   195  
   196  // ToASCII converts a domain or domain label to its ASCII form. For example,
   197  // ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
   198  // ToASCII("golang") is "golang". If an error is encountered it will return
   199  // an error and a (partially) processed result.
   200  func (p *Profile) ToASCII(s string) (string, error) {
   201  	return p.process(s, true)
   202  }
   203  
   204  // ToUnicode converts a domain or domain label to its Unicode form. For example,
   205  // ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
   206  // ToUnicode("golang") is "golang". If an error is encountered it will return
   207  // an error and a (partially) processed result.
   208  func (p *Profile) ToUnicode(s string) (string, error) {
   209  	pp := *p
   210  	pp.transitional = false
   211  	return pp.process(s, false)
   212  }
   213  
   214  // String reports a string with a description of the profile for debugging
   215  // purposes. The string format may change with different versions.
   216  func (p *Profile) String() string {
   217  	s := ""
   218  	if p.transitional {
   219  		s = "Transitional"
   220  	} else {
   221  		s = "NonTransitional"
   222  	}
   223  	if p.useSTD3Rules {
   224  		s += ":UseSTD3Rules"
   225  	}
   226  	if p.validateLabels {
   227  		s += ":ValidateLabels"
   228  	}
   229  	if p.verifyDNSLength {
   230  		s += ":VerifyDNSLength"
   231  	}
   232  	return s
   233  }
   234  
   235  var (
   236  	// Punycode is a Profile that does raw punycode processing with a minimum
   237  	// of validation.
   238  	Punycode *Profile = punycode
   239  
   240  	// Lookup is the recommended profile for looking up domain names, according
   241  	// to Section 5 of RFC 5891. The exact configuration of this profile may
   242  	// change over time.
   243  	Lookup *Profile = lookup
   244  
   245  	// Display is the recommended profile for displaying domain names.
   246  	// The configuration of this profile may change over time.
   247  	Display *Profile = display
   248  
   249  	// Registration is the recommended profile for checking whether a given
   250  	// IDN is valid for registration, according to Section 4 of RFC 5891.
   251  	Registration *Profile = registration
   252  
   253  	punycode = &Profile{}
   254  	lookup   = &Profile{options{
   255  		transitional:   true,
   256  		useSTD3Rules:   true,
   257  		validateLabels: true,
   258  		trie:           trie,
   259  		fromPuny:       validateFromPunycode,
   260  		mapping:        validateAndMap,
   261  		bidirule:       bidirule.ValidString,
   262  	}}
   263  	display = &Profile{options{
   264  		useSTD3Rules:   true,
   265  		validateLabels: true,
   266  		trie:           trie,
   267  		fromPuny:       validateFromPunycode,
   268  		mapping:        validateAndMap,
   269  		bidirule:       bidirule.ValidString,
   270  	}}
   271  	registration = &Profile{options{
   272  		useSTD3Rules:    true,
   273  		validateLabels:  true,
   274  		verifyDNSLength: true,
   275  		trie:            trie,
   276  		fromPuny:        validateFromPunycode,
   277  		mapping:         validateRegistration,
   278  		bidirule:        bidirule.ValidString,
   279  	}}
   280  
   281  	// TODO: profiles
   282  	// Register: recommended for approving domain names: don't do any mappings
   283  	// but rather reject on invalid input. Bundle or block deviation characters.
   284  )
   285  
   286  type labelError struct{ label, code_ string }
   287  
   288  func (e labelError) code() string { return e.code_ }
   289  func (e labelError) Error() string {
   290  	return fmt.Sprintf("idna: invalid label %q", e.label)
   291  }
   292  
   293  type runeError rune
   294  
   295  func (e runeError) code() string { return "P1" }
   296  func (e runeError) Error() string {
   297  	return fmt.Sprintf("idna: disallowed rune %U", e)
   298  }
   299  
   300  // process implements the algorithm described in section 4 of UTS #46,
   301  // see https://www.unicode.org/reports/tr46.
   302  func (p *Profile) process(s string, toASCII bool) (string, error) {
   303  	var err error
   304  	var isBidi bool
   305  	if p.mapping != nil {
   306  		s, isBidi, err = p.mapping(p, s)
   307  	}
   308  	// Remove leading empty labels.
   309  	if p.removeLeadingDots {
   310  		for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
   311  		}
   312  	}
   313  	// TODO: allow for a quick check of the tables data.
   314  	// It seems like we should only create this error on ToASCII, but the
   315  	// UTS 46 conformance tests suggests we should always check this.
   316  	if err == nil && p.verifyDNSLength && s == "" {
   317  		err = &labelError{s, "A4"}
   318  	}
   319  	labels := labelIter{orig: s}
   320  	for ; !labels.done(); labels.next() {
   321  		label := labels.label()
   322  		if label == "" {
   323  			// Empty labels are not okay. The label iterator skips the last
   324  			// label if it is empty.
   325  			if err == nil && p.verifyDNSLength {
   326  				err = &labelError{s, "A4"}
   327  			}
   328  			continue
   329  		}
   330  		if strings.HasPrefix(label, acePrefix) {
   331  			u, err2 := decode(label[len(acePrefix):])
   332  			if err2 != nil {
   333  				if err == nil {
   334  					err = err2
   335  				}
   336  				// Spec says keep the old label.
   337  				continue
   338  			}
   339  			isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
   340  			labels.set(u)
   341  			if err == nil && p.validateLabels {
   342  				err = p.fromPuny(p, u)
   343  			}
   344  			if err == nil {
   345  				// This should be called on NonTransitional, according to the
   346  				// spec, but that currently does not have any effect. Use the
   347  				// original profile to preserve options.
   348  				err = p.validateLabel(u)
   349  			}
   350  		} else if err == nil {
   351  			err = p.validateLabel(label)
   352  		}
   353  	}
   354  	if isBidi && p.bidirule != nil && err == nil {
   355  		for labels.reset(); !labels.done(); labels.next() {
   356  			if !p.bidirule(labels.label()) {
   357  				err = &labelError{s, "B"}
   358  				break
   359  			}
   360  		}
   361  	}
   362  	if toASCII {
   363  		for labels.reset(); !labels.done(); labels.next() {
   364  			label := labels.label()
   365  			if !ascii(label) {
   366  				a, err2 := encode(acePrefix, label)
   367  				if err == nil {
   368  					err = err2
   369  				}
   370  				label = a
   371  				labels.set(a)
   372  			}
   373  			n := len(label)
   374  			if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
   375  				err = &labelError{label, "A4"}
   376  			}
   377  		}
   378  	}
   379  	s = labels.result()
   380  	if toASCII && p.verifyDNSLength && err == nil {
   381  		// Compute the length of the domain name minus the root label and its dot.
   382  		n := len(s)
   383  		if n > 0 && s[n-1] == '.' {
   384  			n--
   385  		}
   386  		if len(s) < 1 || n > 253 {
   387  			err = &labelError{s, "A4"}
   388  		}
   389  	}
   390  	return s, err
   391  }
   392  
   393  func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {
   394  	// TODO: consider first doing a quick check to see if any of these checks
   395  	// need to be done. This will make it slower in the general case, but
   396  	// faster in the common case.
   397  	mapped = norm.NFC.String(s)
   398  	isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft
   399  	return mapped, isBidi, nil
   400  }
   401  
   402  func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {
   403  	// TODO: filter need for normalization in loop below.
   404  	if !norm.NFC.IsNormalString(s) {
   405  		return s, false, &labelError{s, "V1"}
   406  	}
   407  	for i := 0; i < len(s); {
   408  		v, sz := trie.lookupString(s[i:])
   409  		if sz == 0 {
   410  			return s, bidi, runeError(utf8.RuneError)
   411  		}
   412  		bidi = bidi || info(v).isBidi(s[i:])
   413  		// Copy bytes not copied so far.
   414  		switch p.simplify(info(v).category()) {
   415  		// TODO: handle the NV8 defined in the Unicode idna data set to allow
   416  		// for strict conformance to IDNA2008.
   417  		case valid, deviation:
   418  		case disallowed, mapped, unknown, ignored:
   419  			r, _ := utf8.DecodeRuneInString(s[i:])
   420  			return s, bidi, runeError(r)
   421  		}
   422  		i += sz
   423  	}
   424  	return s, bidi, nil
   425  }
   426  
   427  func (c info) isBidi(s string) bool {
   428  	if !c.isMapped() {
   429  		return c&attributesMask == rtl
   430  	}
   431  	// TODO: also store bidi info for mapped data. This is possible, but a bit
   432  	// cumbersome and not for the common case.
   433  	p, _ := bidi.LookupString(s)
   434  	switch p.Class() {
   435  	case bidi.R, bidi.AL, bidi.AN:
   436  		return true
   437  	}
   438  	return false
   439  }
   440  
   441  func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {
   442  	var (
   443  		b []byte
   444  		k int
   445  	)
   446  	// combinedInfoBits contains the or-ed bits of all runes. We use this
   447  	// to derive the mayNeedNorm bit later. This may trigger normalization
   448  	// overeagerly, but it will not do so in the common case. The end result
   449  	// is another 10% saving on BenchmarkProfile for the common case.
   450  	var combinedInfoBits info
   451  	for i := 0; i < len(s); {
   452  		v, sz := trie.lookupString(s[i:])
   453  		if sz == 0 {
   454  			b = append(b, s[k:i]...)
   455  			b = append(b, "\ufffd"...)
   456  			k = len(s)
   457  			if err == nil {
   458  				err = runeError(utf8.RuneError)
   459  			}
   460  			break
   461  		}
   462  		combinedInfoBits |= info(v)
   463  		bidi = bidi || info(v).isBidi(s[i:])
   464  		start := i
   465  		i += sz
   466  		// Copy bytes not copied so far.
   467  		switch p.simplify(info(v).category()) {
   468  		case valid:
   469  			continue
   470  		case disallowed:
   471  			if err == nil {
   472  				r, _ := utf8.DecodeRuneInString(s[start:])
   473  				err = runeError(r)
   474  			}
   475  			continue
   476  		case mapped, deviation:
   477  			b = append(b, s[k:start]...)
   478  			b = info(v).appendMapping(b, s[start:i])
   479  		case ignored:
   480  			b = append(b, s[k:start]...)
   481  			// drop the rune
   482  		case unknown:
   483  			b = append(b, s[k:start]...)
   484  			b = append(b, "\ufffd"...)
   485  		}
   486  		k = i
   487  	}
   488  	if k == 0 {
   489  		// No changes so far.
   490  		if combinedInfoBits&mayNeedNorm != 0 {
   491  			s = norm.NFC.String(s)
   492  		}
   493  	} else {
   494  		b = append(b, s[k:]...)
   495  		if norm.NFC.QuickSpan(b) != len(b) {
   496  			b = norm.NFC.Bytes(b)
   497  		}
   498  		// TODO: the punycode converters require strings as input.
   499  		s = string(b)
   500  	}
   501  	return s, bidi, err
   502  }
   503  
   504  // A labelIter allows iterating over domain name labels.
   505  type labelIter struct {
   506  	orig     string
   507  	slice    []string
   508  	curStart int
   509  	curEnd   int
   510  	i        int
   511  }
   512  
   513  func (l *labelIter) reset() {
   514  	l.curStart = 0
   515  	l.curEnd = 0
   516  	l.i = 0
   517  }
   518  
   519  func (l *labelIter) done() bool {
   520  	return l.curStart >= len(l.orig)
   521  }
   522  
   523  func (l *labelIter) result() string {
   524  	if l.slice != nil {
   525  		return strings.Join(l.slice, ".")
   526  	}
   527  	return l.orig
   528  }
   529  
   530  func (l *labelIter) label() string {
   531  	if l.slice != nil {
   532  		return l.slice[l.i]
   533  	}
   534  	p := strings.IndexByte(l.orig[l.curStart:], '.')
   535  	l.curEnd = l.curStart + p
   536  	if p == -1 {
   537  		l.curEnd = len(l.orig)
   538  	}
   539  	return l.orig[l.curStart:l.curEnd]
   540  }
   541  
   542  // next sets the value to the next label. It skips the last label if it is empty.
   543  func (l *labelIter) next() {
   544  	l.i++
   545  	if l.slice != nil {
   546  		if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
   547  			l.curStart = len(l.orig)
   548  		}
   549  	} else {
   550  		l.curStart = l.curEnd + 1
   551  		if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
   552  			l.curStart = len(l.orig)
   553  		}
   554  	}
   555  }
   556  
   557  func (l *labelIter) set(s string) {
   558  	if l.slice == nil {
   559  		l.slice = strings.Split(l.orig, ".")
   560  	}
   561  	l.slice[l.i] = s
   562  }
   563  
   564  // acePrefix is the ASCII Compatible Encoding prefix.
   565  const acePrefix = "xn--"
   566  
   567  func (p *Profile) simplify(cat category) category {
   568  	switch cat {
   569  	case disallowedSTD3Mapped:
   570  		if p.useSTD3Rules {
   571  			cat = disallowed
   572  		} else {
   573  			cat = mapped
   574  		}
   575  	case disallowedSTD3Valid:
   576  		if p.useSTD3Rules {
   577  			cat = disallowed
   578  		} else {
   579  			cat = valid
   580  		}
   581  	case deviation:
   582  		if !p.transitional {
   583  			cat = valid
   584  		}
   585  	case validNV8, validXV8:
   586  		// TODO: handle V2008
   587  		cat = valid
   588  	}
   589  	return cat
   590  }
   591  
   592  func validateFromPunycode(p *Profile, s string) error {
   593  	if !norm.NFC.IsNormalString(s) {
   594  		return &labelError{s, "V1"}
   595  	}
   596  	// TODO: detect whether string may have to be normalized in the following
   597  	// loop.
   598  	for i := 0; i < len(s); {
   599  		v, sz := trie.lookupString(s[i:])
   600  		if sz == 0 {
   601  			return runeError(utf8.RuneError)
   602  		}
   603  		if c := p.simplify(info(v).category()); c != valid && c != deviation {
   604  			return &labelError{s, "V6"}
   605  		}
   606  		i += sz
   607  	}
   608  	return nil
   609  }
   610  
   611  const (
   612  	zwnj = "\u200c"
   613  	zwj  = "\u200d"
   614  )
   615  
   616  type joinState int8
   617  
   618  const (
   619  	stateStart joinState = iota
   620  	stateVirama
   621  	stateBefore
   622  	stateBeforeVirama
   623  	stateAfter
   624  	stateFAIL
   625  )
   626  
   627  var joinStates = [][numJoinTypes]joinState{
   628  	stateStart: {
   629  		joiningL:   stateBefore,
   630  		joiningD:   stateBefore,
   631  		joinZWNJ:   stateFAIL,
   632  		joinZWJ:    stateFAIL,
   633  		joinVirama: stateVirama,
   634  	},
   635  	stateVirama: {
   636  		joiningL: stateBefore,
   637  		joiningD: stateBefore,
   638  	},
   639  	stateBefore: {
   640  		joiningL:   stateBefore,
   641  		joiningD:   stateBefore,
   642  		joiningT:   stateBefore,
   643  		joinZWNJ:   stateAfter,
   644  		joinZWJ:    stateFAIL,
   645  		joinVirama: stateBeforeVirama,
   646  	},
   647  	stateBeforeVirama: {
   648  		joiningL: stateBefore,
   649  		joiningD: stateBefore,
   650  		joiningT: stateBefore,
   651  	},
   652  	stateAfter: {
   653  		joiningL:   stateFAIL,
   654  		joiningD:   stateBefore,
   655  		joiningT:   stateAfter,
   656  		joiningR:   stateStart,
   657  		joinZWNJ:   stateFAIL,
   658  		joinZWJ:    stateFAIL,
   659  		joinVirama: stateAfter, // no-op as we can't accept joiners here
   660  	},
   661  	stateFAIL: {
   662  		0:          stateFAIL,
   663  		joiningL:   stateFAIL,
   664  		joiningD:   stateFAIL,
   665  		joiningT:   stateFAIL,
   666  		joiningR:   stateFAIL,
   667  		joinZWNJ:   stateFAIL,
   668  		joinZWJ:    stateFAIL,
   669  		joinVirama: stateFAIL,
   670  	},
   671  }
   672  
   673  // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
   674  // already implicitly satisfied by the overall implementation.
   675  func (p *Profile) validateLabel(s string) (err error) {
   676  	if s == "" {
   677  		if p.verifyDNSLength {
   678  			return &labelError{s, "A4"}
   679  		}
   680  		return nil
   681  	}
   682  	if !p.validateLabels {
   683  		return nil
   684  	}
   685  	trie := p.trie // p.validateLabels is only set if trie is set.
   686  	if len(s) > 4 && s[2] == '-' && s[3] == '-' {
   687  		return &labelError{s, "V2"}
   688  	}
   689  	if s[0] == '-' || s[len(s)-1] == '-' {
   690  		return &labelError{s, "V3"}
   691  	}
   692  	// TODO: merge the use of this in the trie.
   693  	v, sz := trie.lookupString(s)
   694  	x := info(v)
   695  	if x.isModifier() {
   696  		return &labelError{s, "V5"}
   697  	}
   698  	// Quickly return in the absence of zero-width (non) joiners.
   699  	if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
   700  		return nil
   701  	}
   702  	st := stateStart
   703  	for i := 0; ; {
   704  		jt := x.joinType()
   705  		if s[i:i+sz] == zwj {
   706  			jt = joinZWJ
   707  		} else if s[i:i+sz] == zwnj {
   708  			jt = joinZWNJ
   709  		}
   710  		st = joinStates[st][jt]
   711  		if x.isViramaModifier() {
   712  			st = joinStates[st][joinVirama]
   713  		}
   714  		if i += sz; i == len(s) {
   715  			break
   716  		}
   717  		v, sz = trie.lookupString(s[i:])
   718  		x = info(v)
   719  	}
   720  	if st == stateFAIL || st == stateAfter {
   721  		return &labelError{s, "C"}
   722  	}
   723  	return nil
   724  }
   725  
   726  func ascii(s string) bool {
   727  	for i := 0; i < len(s); i++ {
   728  		if s[i] >= utf8.RuneSelf {
   729  			return false
   730  		}
   731  	}
   732  	return true
   733  }