gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/net/idna/idna10.0.0.go (about)

     1  // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
     2  
     3  // Copyright 2016 The Go Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  //go:build go1.10
     8  // +build go1.10
     9  
    10  // Package idna implements IDNA2008 using the compatibility processing
    11  // defined by UTS (Unicode Technical Standard) #46, which defines a standard to
    12  // deal with the transition from IDNA2003.
    13  //
    14  // IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
    15  // 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
    16  // UTS #46 is defined in https://www.unicode.org/reports/tr46.
    17  // See https://unicode.org/cldr/utility/idna.jsp for a visualization of the
    18  // differences between these two standards.
    19  package idna // import "gitee.com/ks-custle/core-gm/net/idna"
    20  
    21  import (
    22  	"fmt"
    23  	"strings"
    24  	"unicode/utf8"
    25  
    26  	"golang.org/x/text/secure/bidirule"
    27  	"golang.org/x/text/unicode/bidi"
    28  	"golang.org/x/text/unicode/norm"
    29  )
    30  
    31  // NOTE: Unlike common practice in Go APIs, the functions will return a
    32  // sanitized domain name in case of errors. Browsers sometimes use a partially
    33  // evaluated string as lookup.
    34  // TODO: the current error handling is, in my opinion, the least opinionated.
    35  // Other strategies are also viable, though:
    36  // Option 1) Return an empty string in case of error, but allow the user to
    37  //    specify explicitly which errors to ignore.
    38  // Option 2) Return the partially evaluated string if it is itself a valid
    39  //    string, otherwise return the empty string in case of error.
    40  // Option 3) Option 1 and 2.
    41  // Option 4) Always return an empty string for now and implement Option 1 as
    42  //    needed, and document that the return string may not be empty in case of
    43  //    error in the future.
    44  // I think Option 1 is best, but it is quite opinionated.
    45  
    46  // ToASCII is a wrapper for Punycode.ToASCII.
    47  func ToASCII(s string) (string, error) {
    48  	return Punycode.process(s, true)
    49  }
    50  
    51  // ToUnicode is a wrapper for Punycode.ToUnicode.
    52  func ToUnicode(s string) (string, error) {
    53  	return Punycode.process(s, false)
    54  }
    55  
    56  // An Option configures a Profile at creation time.
    57  type Option func(*options)
    58  
    59  // Transitional sets a Profile to use the Transitional mapping as defined in UTS
    60  // #46. This will cause, for example, "ß" to be mapped to "ss". Using the
    61  // transitional mapping provides a compromise between IDNA2003 and IDNA2008
    62  // compatibility. It is used by some browsers when resolving domain names. This
    63  // option is only meaningful if combined with MapForLookup.
    64  func Transitional(transitional bool) Option {
    65  	return func(o *options) { o.transitional = transitional }
    66  }
    67  
    68  // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
    69  // are longer than allowed by the RFC.
    70  //
    71  // This option corresponds to the VerifyDnsLength flag in UTS #46.
    72  func VerifyDNSLength(verify bool) Option {
    73  	return func(o *options) { o.verifyDNSLength = verify }
    74  }
    75  
    76  // RemoveLeadingDots removes leading label separators. Leading runes that map to
    77  // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
    78  func RemoveLeadingDots(remove bool) Option {
    79  	return func(o *options) { o.removeLeadingDots = remove }
    80  }
    81  
    82  // ValidateLabels sets whether to check the mandatory label validation criteria
    83  // as defined in Section 5.4 of RFC 5891. This includes testing for correct use
    84  // of hyphens ('-'), normalization, validity of runes, and the context rules.
    85  // In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
    86  // in UTS #46.
    87  func ValidateLabels(enable bool) Option {
    88  	return func(o *options) {
    89  		// Don't override existing mappings, but set one that at least checks
    90  		// normalization if it is not set.
    91  		if o.mapping == nil && enable {
    92  			o.mapping = normalize
    93  		}
    94  		o.trie = trie
    95  		o.checkJoiners = enable
    96  		o.checkHyphens = enable
    97  		if enable {
    98  			o.fromPuny = validateFromPunycode
    99  		} else {
   100  			o.fromPuny = nil
   101  		}
   102  	}
   103  }
   104  
   105  // CheckHyphens sets whether to check for correct use of hyphens ('-') in
   106  // labels. Most web browsers do not have this option set, since labels such as
   107  // "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
   108  //
   109  // This option corresponds to the CheckHyphens flag in UTS #46.
   110  func CheckHyphens(enable bool) Option {
   111  	return func(o *options) { o.checkHyphens = enable }
   112  }
   113  
   114  // CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
   115  // A of RFC 5892, concerning the use of joiner runes.
   116  //
   117  // This option corresponds to the CheckJoiners flag in UTS #46.
   118  func CheckJoiners(enable bool) Option {
   119  	return func(o *options) {
   120  		o.trie = trie
   121  		o.checkJoiners = enable
   122  	}
   123  }
   124  
   125  // StrictDomainName limits the set of permissible ASCII characters to those
   126  // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
   127  // hyphen). This is set by default for MapForLookup and ValidateForRegistration,
   128  // but is only useful if ValidateLabels is set.
   129  //
   130  // This option is useful, for instance, for browsers that allow characters
   131  // outside this range, for example a '_' (U+005F LOW LINE). See
   132  // http://www.rfc-editor.org/std/std3.txt for more details.
   133  //
   134  // This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
   135  func StrictDomainName(use bool) Option {
   136  	return func(o *options) { o.useSTD3Rules = use }
   137  }
   138  
   139  // NOTE: the following options pull in tables. The tables should not be linked
   140  // in as long as the options are not used.
   141  
   142  // BidiRule enables the Bidi rule as defined in RFC 5893. Any application
   143  // that relies on proper validation of labels should include this rule.
   144  //
   145  // This option corresponds to the CheckBidi flag in UTS #46.
   146  func BidiRule() Option {
   147  	return func(o *options) { o.bidirule = bidirule.ValidString }
   148  }
   149  
   150  // ValidateForRegistration sets validation options to verify that a given IDN is
   151  // properly formatted for registration as defined by Section 4 of RFC 5891.
   152  func ValidateForRegistration() Option {
   153  	return func(o *options) {
   154  		o.mapping = validateRegistration
   155  		StrictDomainName(true)(o)
   156  		ValidateLabels(true)(o)
   157  		VerifyDNSLength(true)(o)
   158  		BidiRule()(o)
   159  	}
   160  }
   161  
   162  // MapForLookup sets validation and mapping options such that a given IDN is
   163  // transformed for domain name lookup according to the requirements set out in
   164  // Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
   165  // RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
   166  // to add this check.
   167  //
   168  // The mappings include normalization and mapping case, width and other
   169  // compatibility mappings.
   170  func MapForLookup() Option {
   171  	return func(o *options) {
   172  		o.mapping = validateAndMap
   173  		StrictDomainName(true)(o)
   174  		ValidateLabels(true)(o)
   175  	}
   176  }
   177  
   178  type options struct {
   179  	transitional      bool
   180  	useSTD3Rules      bool
   181  	checkHyphens      bool
   182  	checkJoiners      bool
   183  	verifyDNSLength   bool
   184  	removeLeadingDots bool
   185  
   186  	trie *idnaTrie
   187  
   188  	// fromPuny calls validation rules when converting A-labels to U-labels.
   189  	fromPuny func(p *Profile, s string) error
   190  
   191  	// mapping implements a validation and mapping step as defined in RFC 5895
   192  	// or UTS 46, tailored to, for example, domain registration or lookup.
   193  	mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)
   194  
   195  	// bidirule, if specified, checks whether s conforms to the Bidi Rule
   196  	// defined in RFC 5893.
   197  	bidirule func(s string) bool
   198  }
   199  
   200  // A Profile defines the configuration of an IDNA mapper.
   201  type Profile struct {
   202  	options
   203  }
   204  
   205  func apply(o *options, opts []Option) {
   206  	for _, f := range opts {
   207  		f(o)
   208  	}
   209  }
   210  
   211  // New creates a new Profile.
   212  //
   213  // With no options, the returned Profile is the most permissive and equals the
   214  // Punycode Profile. Options can be passed to further restrict the Profile. The
   215  // MapForLookup and ValidateForRegistration options set a collection of options,
   216  // for lookup and registration purposes respectively, which can be tailored by
   217  // adding more fine-grained options, where later options override earlier
   218  // options.
   219  func New(o ...Option) *Profile {
   220  	p := &Profile{}
   221  	apply(&p.options, o)
   222  	return p
   223  }
   224  
   225  // ToASCII converts a domain or domain label to its ASCII form. For example,
   226  // ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
   227  // ToASCII("golang") is "golang". If an error is encountered it will return
   228  // an error and a (partially) processed result.
   229  func (p *Profile) ToASCII(s string) (string, error) {
   230  	return p.process(s, true)
   231  }
   232  
   233  // ToUnicode converts a domain or domain label to its Unicode form. For example,
   234  // ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
   235  // ToUnicode("golang") is "golang". If an error is encountered it will return
   236  // an error and a (partially) processed result.
   237  func (p *Profile) ToUnicode(s string) (string, error) {
   238  	pp := *p
   239  	pp.transitional = false
   240  	return pp.process(s, false)
   241  }
   242  
   243  // String reports a string with a description of the profile for debugging
   244  // purposes. The string format may change with different versions.
   245  func (p *Profile) String() string {
   246  	s := ""
   247  	if p.transitional {
   248  		s = "Transitional"
   249  	} else {
   250  		s = "NonTransitional"
   251  	}
   252  	if p.useSTD3Rules {
   253  		s += ":UseSTD3Rules"
   254  	}
   255  	if p.checkHyphens {
   256  		s += ":CheckHyphens"
   257  	}
   258  	if p.checkJoiners {
   259  		s += ":CheckJoiners"
   260  	}
   261  	if p.verifyDNSLength {
   262  		s += ":VerifyDNSLength"
   263  	}
   264  	return s
   265  }
   266  
   267  var (
   268  	// Punycode is a Profile that does raw punycode processing with a minimum
   269  	// of validation.
   270  	Punycode *Profile = punycode
   271  
   272  	// Lookup is the recommended profile for looking up domain names, according
   273  	// to Section 5 of RFC 5891. The exact configuration of this profile may
   274  	// change over time.
   275  	Lookup *Profile = lookup
   276  
   277  	// Display is the recommended profile for displaying domain names.
   278  	// The configuration of this profile may change over time.
   279  	Display *Profile = display
   280  
   281  	// Registration is the recommended profile for checking whether a given
   282  	// IDN is valid for registration, according to Section 4 of RFC 5891.
   283  	Registration *Profile = registration
   284  
   285  	punycode = &Profile{}
   286  	lookup   = &Profile{options{
   287  		transitional: transitionalLookup,
   288  		useSTD3Rules: true,
   289  		checkHyphens: true,
   290  		checkJoiners: true,
   291  		trie:         trie,
   292  		fromPuny:     validateFromPunycode,
   293  		mapping:      validateAndMap,
   294  		bidirule:     bidirule.ValidString,
   295  	}}
   296  	display = &Profile{options{
   297  		useSTD3Rules: true,
   298  		checkHyphens: true,
   299  		checkJoiners: true,
   300  		trie:         trie,
   301  		fromPuny:     validateFromPunycode,
   302  		mapping:      validateAndMap,
   303  		bidirule:     bidirule.ValidString,
   304  	}}
   305  	registration = &Profile{options{
   306  		useSTD3Rules:    true,
   307  		verifyDNSLength: true,
   308  		checkHyphens:    true,
   309  		checkJoiners:    true,
   310  		trie:            trie,
   311  		fromPuny:        validateFromPunycode,
   312  		mapping:         validateRegistration,
   313  		bidirule:        bidirule.ValidString,
   314  	}}
   315  
   316  	// TODO: profiles
   317  	// Register: recommended for approving domain names: don't do any mappings
   318  	// but rather reject on invalid input. Bundle or block deviation characters.
   319  )
   320  
   321  type labelError struct{ label, code_ string }
   322  
   323  func (e labelError) code() string { return e.code_ }
   324  func (e labelError) Error() string {
   325  	return fmt.Sprintf("idna: invalid label %q", e.label)
   326  }
   327  
   328  type runeError rune
   329  
   330  func (e runeError) code() string { return "P1" }
   331  func (e runeError) Error() string {
   332  	return fmt.Sprintf("idna: disallowed rune %U", e)
   333  }
   334  
   335  // process implements the algorithm described in section 4 of UTS #46,
   336  // see https://www.unicode.org/reports/tr46.
   337  func (p *Profile) process(s string, toASCII bool) (string, error) {
   338  	var err error
   339  	var isBidi bool
   340  	if p.mapping != nil {
   341  		s, isBidi, err = p.mapping(p, s)
   342  	}
   343  	// Remove leading empty labels.
   344  	if p.removeLeadingDots {
   345  		for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
   346  		}
   347  	}
   348  	// TODO: allow for a quick check of the tables data.
   349  	// It seems like we should only create this error on ToASCII, but the
   350  	// UTS 46 conformance tests suggests we should always check this.
   351  	if err == nil && p.verifyDNSLength && s == "" {
   352  		err = &labelError{s, "A4"}
   353  	}
   354  	labels := labelIter{orig: s}
   355  	for ; !labels.done(); labels.next() {
   356  		label := labels.label()
   357  		if label == "" {
   358  			// Empty labels are not okay. The label iterator skips the last
   359  			// label if it is empty.
   360  			if err == nil && p.verifyDNSLength {
   361  				err = &labelError{s, "A4"}
   362  			}
   363  			continue
   364  		}
   365  		if strings.HasPrefix(label, acePrefix) {
   366  			u, err2 := decode(label[len(acePrefix):])
   367  			if err2 != nil {
   368  				if err == nil {
   369  					err = err2
   370  				}
   371  				// Spec says keep the old label.
   372  				continue
   373  			}
   374  			isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
   375  			labels.set(u)
   376  			if err == nil && p.fromPuny != nil {
   377  				err = p.fromPuny(p, u)
   378  			}
   379  			if err == nil {
   380  				// This should be called on NonTransitional, according to the
   381  				// spec, but that currently does not have any effect. Use the
   382  				// original profile to preserve options.
   383  				err = p.validateLabel(u)
   384  			}
   385  		} else if err == nil {
   386  			err = p.validateLabel(label)
   387  		}
   388  	}
   389  	if isBidi && p.bidirule != nil && err == nil {
   390  		for labels.reset(); !labels.done(); labels.next() {
   391  			if !p.bidirule(labels.label()) {
   392  				err = &labelError{s, "B"}
   393  				break
   394  			}
   395  		}
   396  	}
   397  	if toASCII {
   398  		for labels.reset(); !labels.done(); labels.next() {
   399  			label := labels.label()
   400  			if !ascii(label) {
   401  				a, err2 := encode(acePrefix, label)
   402  				if err == nil {
   403  					err = err2
   404  				}
   405  				label = a
   406  				labels.set(a)
   407  			}
   408  			n := len(label)
   409  			if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
   410  				err = &labelError{label, "A4"}
   411  			}
   412  		}
   413  	}
   414  	s = labels.result()
   415  	if toASCII && p.verifyDNSLength && err == nil {
   416  		// Compute the length of the domain name minus the root label and its dot.
   417  		n := len(s)
   418  		if n > 0 && s[n-1] == '.' {
   419  			n--
   420  		}
   421  		if len(s) < 1 || n > 253 {
   422  			err = &labelError{s, "A4"}
   423  		}
   424  	}
   425  	return s, err
   426  }
   427  
   428  func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {
   429  	// TODO: consider first doing a quick check to see if any of these checks
   430  	// need to be done. This will make it slower in the general case, but
   431  	// faster in the common case.
   432  	mapped = norm.NFC.String(s)
   433  	isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft
   434  	return mapped, isBidi, nil
   435  }
   436  
   437  func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {
   438  	// TODO: filter need for normalization in loop below.
   439  	if !norm.NFC.IsNormalString(s) {
   440  		return s, false, &labelError{s, "V1"}
   441  	}
   442  	for i := 0; i < len(s); {
   443  		v, sz := trie.lookupString(s[i:])
   444  		if sz == 0 {
   445  			return s, bidi, runeError(utf8.RuneError)
   446  		}
   447  		bidi = bidi || info(v).isBidi(s[i:])
   448  		// Copy bytes not copied so far.
   449  		switch p.simplify(info(v).category()) {
   450  		// TODO: handle the NV8 defined in the Unicode idna data set to allow
   451  		// for strict conformance to IDNA2008.
   452  		case valid, deviation:
   453  		case disallowed, mapped, unknown, ignored:
   454  			r, _ := utf8.DecodeRuneInString(s[i:])
   455  			return s, bidi, runeError(r)
   456  		}
   457  		i += sz
   458  	}
   459  	return s, bidi, nil
   460  }
   461  
   462  func (c info) isBidi(s string) bool {
   463  	if !c.isMapped() {
   464  		return c&attributesMask == rtl
   465  	}
   466  	// TODO: also store bidi info for mapped data. This is possible, but a bit
   467  	// cumbersome and not for the common case.
   468  	p, _ := bidi.LookupString(s)
   469  	switch p.Class() {
   470  	case bidi.R, bidi.AL, bidi.AN:
   471  		return true
   472  	}
   473  	return false
   474  }
   475  
   476  func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {
   477  	var (
   478  		b []byte
   479  		k int
   480  	)
   481  	// combinedInfoBits contains the or-ed bits of all runes. We use this
   482  	// to derive the mayNeedNorm bit later. This may trigger normalization
   483  	// overeagerly, but it will not do so in the common case. The end result
   484  	// is another 10% saving on BenchmarkProfile for the common case.
   485  	var combinedInfoBits info
   486  	for i := 0; i < len(s); {
   487  		v, sz := trie.lookupString(s[i:])
   488  		if sz == 0 {
   489  			b = append(b, s[k:i]...)
   490  			b = append(b, "\ufffd"...)
   491  			k = len(s)
   492  			if err == nil {
   493  				err = runeError(utf8.RuneError)
   494  			}
   495  			break
   496  		}
   497  		combinedInfoBits |= info(v)
   498  		bidi = bidi || info(v).isBidi(s[i:])
   499  		start := i
   500  		i += sz
   501  		// Copy bytes not copied so far.
   502  		switch p.simplify(info(v).category()) {
   503  		case valid:
   504  			continue
   505  		case disallowed:
   506  			if err == nil {
   507  				r, _ := utf8.DecodeRuneInString(s[start:])
   508  				err = runeError(r)
   509  			}
   510  			continue
   511  		case mapped, deviation:
   512  			b = append(b, s[k:start]...)
   513  			b = info(v).appendMapping(b, s[start:i])
   514  		case ignored:
   515  			b = append(b, s[k:start]...)
   516  			// drop the rune
   517  		case unknown:
   518  			b = append(b, s[k:start]...)
   519  			b = append(b, "\ufffd"...)
   520  		}
   521  		k = i
   522  	}
   523  	if k == 0 {
   524  		// No changes so far.
   525  		if combinedInfoBits&mayNeedNorm != 0 {
   526  			s = norm.NFC.String(s)
   527  		}
   528  	} else {
   529  		b = append(b, s[k:]...)
   530  		if norm.NFC.QuickSpan(b) != len(b) {
   531  			b = norm.NFC.Bytes(b)
   532  		}
   533  		// TODO: the punycode converters require strings as input.
   534  		s = string(b)
   535  	}
   536  	return s, bidi, err
   537  }
   538  
   539  // A labelIter allows iterating over domain name labels.
   540  type labelIter struct {
   541  	orig     string
   542  	slice    []string
   543  	curStart int
   544  	curEnd   int
   545  	i        int
   546  }
   547  
   548  func (l *labelIter) reset() {
   549  	l.curStart = 0
   550  	l.curEnd = 0
   551  	l.i = 0
   552  }
   553  
   554  func (l *labelIter) done() bool {
   555  	return l.curStart >= len(l.orig)
   556  }
   557  
   558  func (l *labelIter) result() string {
   559  	if l.slice != nil {
   560  		return strings.Join(l.slice, ".")
   561  	}
   562  	return l.orig
   563  }
   564  
   565  func (l *labelIter) label() string {
   566  	if l.slice != nil {
   567  		return l.slice[l.i]
   568  	}
   569  	p := strings.IndexByte(l.orig[l.curStart:], '.')
   570  	l.curEnd = l.curStart + p
   571  	if p == -1 {
   572  		l.curEnd = len(l.orig)
   573  	}
   574  	return l.orig[l.curStart:l.curEnd]
   575  }
   576  
   577  // next sets the value to the next label. It skips the last label if it is empty.
   578  func (l *labelIter) next() {
   579  	l.i++
   580  	if l.slice != nil {
   581  		if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
   582  			l.curStart = len(l.orig)
   583  		}
   584  	} else {
   585  		l.curStart = l.curEnd + 1
   586  		if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
   587  			l.curStart = len(l.orig)
   588  		}
   589  	}
   590  }
   591  
   592  func (l *labelIter) set(s string) {
   593  	if l.slice == nil {
   594  		l.slice = strings.Split(l.orig, ".")
   595  	}
   596  	l.slice[l.i] = s
   597  }
   598  
   599  // acePrefix is the ASCII Compatible Encoding prefix.
   600  const acePrefix = "xn--"
   601  
   602  func (p *Profile) simplify(cat category) category {
   603  	switch cat {
   604  	case disallowedSTD3Mapped:
   605  		if p.useSTD3Rules {
   606  			cat = disallowed
   607  		} else {
   608  			cat = mapped
   609  		}
   610  	case disallowedSTD3Valid:
   611  		if p.useSTD3Rules {
   612  			cat = disallowed
   613  		} else {
   614  			cat = valid
   615  		}
   616  	case deviation:
   617  		if !p.transitional {
   618  			cat = valid
   619  		}
   620  	case validNV8, validXV8:
   621  		// TODO: handle V2008
   622  		cat = valid
   623  	}
   624  	return cat
   625  }
   626  
   627  func validateFromPunycode(p *Profile, s string) error {
   628  	if !norm.NFC.IsNormalString(s) {
   629  		return &labelError{s, "V1"}
   630  	}
   631  	// TODO: detect whether string may have to be normalized in the following
   632  	// loop.
   633  	for i := 0; i < len(s); {
   634  		v, sz := trie.lookupString(s[i:])
   635  		if sz == 0 {
   636  			return runeError(utf8.RuneError)
   637  		}
   638  		if c := p.simplify(info(v).category()); c != valid && c != deviation {
   639  			return &labelError{s, "V6"}
   640  		}
   641  		i += sz
   642  	}
   643  	return nil
   644  }
   645  
   646  const (
   647  	zwnj = "\u200c"
   648  	zwj  = "\u200d"
   649  )
   650  
   651  type joinState int8
   652  
   653  const (
   654  	stateStart joinState = iota
   655  	stateVirama
   656  	stateBefore
   657  	stateBeforeVirama
   658  	stateAfter
   659  	stateFAIL
   660  )
   661  
   662  var joinStates = [][numJoinTypes]joinState{
   663  	stateStart: {
   664  		joiningL:   stateBefore,
   665  		joiningD:   stateBefore,
   666  		joinZWNJ:   stateFAIL,
   667  		joinZWJ:    stateFAIL,
   668  		joinVirama: stateVirama,
   669  	},
   670  	stateVirama: {
   671  		joiningL: stateBefore,
   672  		joiningD: stateBefore,
   673  	},
   674  	stateBefore: {
   675  		joiningL:   stateBefore,
   676  		joiningD:   stateBefore,
   677  		joiningT:   stateBefore,
   678  		joinZWNJ:   stateAfter,
   679  		joinZWJ:    stateFAIL,
   680  		joinVirama: stateBeforeVirama,
   681  	},
   682  	stateBeforeVirama: {
   683  		joiningL: stateBefore,
   684  		joiningD: stateBefore,
   685  		joiningT: stateBefore,
   686  	},
   687  	stateAfter: {
   688  		joiningL:   stateFAIL,
   689  		joiningD:   stateBefore,
   690  		joiningT:   stateAfter,
   691  		joiningR:   stateStart,
   692  		joinZWNJ:   stateFAIL,
   693  		joinZWJ:    stateFAIL,
   694  		joinVirama: stateAfter, // no-op as we can't accept joiners here
   695  	},
   696  	stateFAIL: {
   697  		0:          stateFAIL,
   698  		joiningL:   stateFAIL,
   699  		joiningD:   stateFAIL,
   700  		joiningT:   stateFAIL,
   701  		joiningR:   stateFAIL,
   702  		joinZWNJ:   stateFAIL,
   703  		joinZWJ:    stateFAIL,
   704  		joinVirama: stateFAIL,
   705  	},
   706  }
   707  
   708  // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
   709  // already implicitly satisfied by the overall implementation.
   710  func (p *Profile) validateLabel(s string) (err error) {
   711  	if s == "" {
   712  		if p.verifyDNSLength {
   713  			return &labelError{s, "A4"}
   714  		}
   715  		return nil
   716  	}
   717  	if p.checkHyphens {
   718  		if len(s) > 4 && s[2] == '-' && s[3] == '-' {
   719  			return &labelError{s, "V2"}
   720  		}
   721  		if s[0] == '-' || s[len(s)-1] == '-' {
   722  			return &labelError{s, "V3"}
   723  		}
   724  	}
   725  	if !p.checkJoiners {
   726  		return nil
   727  	}
   728  	trie := p.trie // p.checkJoiners is only set if trie is set.
   729  	// TODO: merge the use of this in the trie.
   730  	v, sz := trie.lookupString(s)
   731  	x := info(v)
   732  	if x.isModifier() {
   733  		return &labelError{s, "V5"}
   734  	}
   735  	// Quickly return in the absence of zero-width (non) joiners.
   736  	if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
   737  		return nil
   738  	}
   739  	st := stateStart
   740  	for i := 0; ; {
   741  		jt := x.joinType()
   742  		if s[i:i+sz] == zwj {
   743  			jt = joinZWJ
   744  		} else if s[i:i+sz] == zwnj {
   745  			jt = joinZWNJ
   746  		}
   747  		st = joinStates[st][jt]
   748  		if x.isViramaModifier() {
   749  			st = joinStates[st][joinVirama]
   750  		}
   751  		if i += sz; i == len(s) {
   752  			break
   753  		}
   754  		v, sz = trie.lookupString(s[i:])
   755  		x = info(v)
   756  	}
   757  	if st == stateFAIL || st == stateAfter {
   758  		return &labelError{s, "C"}
   759  	}
   760  	return nil
   761  }
   762  
   763  func ascii(s string) bool {
   764  	for i := 0; i < len(s); i++ {
   765  		if s[i] >= utf8.RuneSelf {
   766  			return false
   767  		}
   768  	}
   769  	return true
   770  }