golang.org/toolchain@v0.0.1-go1.9rc2.windows-amd64/src/vendor/golang_org/x/text/secure/bidirule/bidirule.go (about)

     1  // Code generated by running "go run gen.go -core" in golang.org/x/text. DO NOT EDIT.
     2  
     3  // Copyright 2016 The Go Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  // Package bidirule implements the Bidi Rule defined by RFC 5893.
     8  //
     9  // This package is under development. The API may change without notice and
    10  // without preserving backward compatibility.
    11  package bidirule
    12  
    13  import (
    14  	"errors"
    15  	"unicode/utf8"
    16  
    17  	"golang_org/x/text/transform"
    18  	"golang_org/x/text/unicode/bidi"
    19  )
    20  
    21  // This file contains an implementation of RFC 5893: Right-to-Left Scripts for
    22  // Internationalized Domain Names for Applications (IDNA)
    23  //
    24  // A label is an individual component of a domain name.  Labels are usually
    25  // shown separated by dots; for example, the domain name "www.example.com" is
    26  // composed of three labels: "www", "example", and "com".
    27  //
    28  // An RTL label is a label that contains at least one character of class R, AL,
    29  // or AN. An LTR label is any label that is not an RTL label.
    30  //
    31  // A "Bidi domain name" is a domain name that contains at least one RTL label.
    32  //
    33  //  The following guarantees can be made based on the above:
    34  //
    35  //  o  In a domain name consisting of only labels that satisfy the rule,
    36  //     the requirements of Section 3 are satisfied.  Note that even LTR
    37  //     labels and pure ASCII labels have to be tested.
    38  //
    39  //  o  In a domain name consisting of only LDH labels (as defined in the
    40  //     Definitions document [RFC5890]) and labels that satisfy the rule,
    41  //     the requirements of Section 3 are satisfied as long as a label
    42  //     that starts with an ASCII digit does not come after a
    43  //     right-to-left label.
    44  //
    45  //  No guarantee is given for other combinations.
    46  
    47  // ErrInvalid indicates a label is invalid according to the Bidi Rule.
    48  var ErrInvalid = errors.New("bidirule: failed Bidi Rule")
    49  
    50  type ruleState uint8
    51  
    52  const (
    53  	ruleInitial ruleState = iota
    54  	ruleLTR
    55  	ruleLTRFinal
    56  	ruleRTL
    57  	ruleRTLFinal
    58  	ruleInvalid
    59  )
    60  
    61  type ruleTransition struct {
    62  	next ruleState
    63  	mask uint16
    64  }
    65  
    66  var transitions = [...][2]ruleTransition{
    67  	// [2.1] The first character must be a character with Bidi property L, R, or
    68  	// AL. If it has the R or AL property, it is an RTL label; if it has the L
    69  	// property, it is an LTR label.
    70  	ruleInitial: {
    71  		{ruleLTRFinal, 1 << bidi.L},
    72  		{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL},
    73  	},
    74  	ruleRTL: {
    75  		// [2.3] In an RTL label, the end of the label must be a character with
    76  		// Bidi property R, AL, EN, or AN, followed by zero or more characters
    77  		// with Bidi property NSM.
    78  		{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN},
    79  
    80  		// [2.2] In an RTL label, only characters with the Bidi properties R,
    81  		// AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
    82  		// We exclude the entries from [2.3]
    83  		{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
    84  	},
    85  	ruleRTLFinal: {
    86  		// [2.3] In an RTL label, the end of the label must be a character with
    87  		// Bidi property R, AL, EN, or AN, followed by zero or more characters
    88  		// with Bidi property NSM.
    89  		{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN | 1<<bidi.NSM},
    90  
    91  		// [2.2] In an RTL label, only characters with the Bidi properties R,
    92  		// AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
    93  		// We exclude the entries from [2.3] and NSM.
    94  		{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
    95  	},
    96  	ruleLTR: {
    97  		// [2.6] In an LTR label, the end of the label must be a character with
    98  		// Bidi property L or EN, followed by zero or more characters with Bidi
    99  		// property NSM.
   100  		{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN},
   101  
   102  		// [2.5] In an LTR label, only characters with the Bidi properties L,
   103  		// EN, ES, CS, ET, ON, BN, or NSM are allowed.
   104  		// We exclude the entries from [2.6].
   105  		{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
   106  	},
   107  	ruleLTRFinal: {
   108  		// [2.6] In an LTR label, the end of the label must be a character with
   109  		// Bidi property L or EN, followed by zero or more characters with Bidi
   110  		// property NSM.
   111  		{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN | 1<<bidi.NSM},
   112  
   113  		// [2.5] In an LTR label, only characters with the Bidi properties L,
   114  		// EN, ES, CS, ET, ON, BN, or NSM are allowed.
   115  		// We exclude the entries from [2.6].
   116  		{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
   117  	},
   118  	ruleInvalid: {
   119  		{ruleInvalid, 0},
   120  		{ruleInvalid, 0},
   121  	},
   122  }
   123  
   124  // [2.4] In an RTL label, if an EN is present, no AN may be present, and
   125  // vice versa.
   126  const exclusiveRTL = uint16(1<<bidi.EN | 1<<bidi.AN)
   127  
   128  // From RFC 5893
   129  // An RTL label is a label that contains at least one character of type
   130  // R, AL, or AN.
   131  //
   132  // An LTR label is any label that is not an RTL label.
   133  
   134  // Direction reports the direction of the given label as defined by RFC 5893.
   135  // The Bidi Rule does not have to be applied to labels of the category
   136  // LeftToRight.
   137  func Direction(b []byte) bidi.Direction {
   138  	for i := 0; i < len(b); {
   139  		e, sz := bidi.Lookup(b[i:])
   140  		if sz == 0 {
   141  			i++
   142  		}
   143  		c := e.Class()
   144  		if c == bidi.R || c == bidi.AL || c == bidi.AN {
   145  			return bidi.RightToLeft
   146  		}
   147  		i += sz
   148  	}
   149  	return bidi.LeftToRight
   150  }
   151  
   152  // DirectionString reports the direction of the given label as defined by RFC
   153  // 5893. The Bidi Rule does not have to be applied to labels of the category
   154  // LeftToRight.
   155  func DirectionString(s string) bidi.Direction {
   156  	for i := 0; i < len(s); {
   157  		e, sz := bidi.LookupString(s[i:])
   158  		if sz == 0 {
   159  			i++
   160  		}
   161  		c := e.Class()
   162  		if c == bidi.R || c == bidi.AL || c == bidi.AN {
   163  			return bidi.RightToLeft
   164  		}
   165  		i += sz
   166  	}
   167  	return bidi.LeftToRight
   168  }
   169  
   170  // Valid reports whether b conforms to the BiDi rule.
   171  func Valid(b []byte) bool {
   172  	var t Transformer
   173  	if n, ok := t.advance(b); !ok || n < len(b) {
   174  		return false
   175  	}
   176  	return t.isFinal()
   177  }
   178  
   179  // ValidString reports whether s conforms to the BiDi rule.
   180  func ValidString(s string) bool {
   181  	var t Transformer
   182  	if n, ok := t.advanceString(s); !ok || n < len(s) {
   183  		return false
   184  	}
   185  	return t.isFinal()
   186  }
   187  
   188  // New returns a Transformer that verifies that input adheres to the Bidi Rule.
   189  func New() *Transformer {
   190  	return &Transformer{}
   191  }
   192  
   193  // Transformer implements transform.Transform.
   194  type Transformer struct {
   195  	state  ruleState
   196  	hasRTL bool
   197  	seen   uint16
   198  }
   199  
   200  // A rule can only be violated for "Bidi Domain names", meaning if one of the
   201  // following categories has been observed.
   202  func (t *Transformer) isRTL() bool {
   203  	const isRTL = 1<<bidi.R | 1<<bidi.AL | 1<<bidi.AN
   204  	return t.seen&isRTL != 0
   205  }
   206  
   207  func (t *Transformer) isFinal() bool {
   208  	if !t.isRTL() {
   209  		return true
   210  	}
   211  	return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
   212  }
   213  
   214  // Reset implements transform.Transformer.
   215  func (t *Transformer) Reset() { *t = Transformer{} }
   216  
   217  // Transform implements transform.Transformer. This Transformer has state and
   218  // needs to be reset between uses.
   219  func (t *Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
   220  	if len(dst) < len(src) {
   221  		src = src[:len(dst)]
   222  		atEOF = false
   223  		err = transform.ErrShortDst
   224  	}
   225  	n, err1 := t.Span(src, atEOF)
   226  	copy(dst, src[:n])
   227  	if err == nil || err1 != nil && err1 != transform.ErrShortSrc {
   228  		err = err1
   229  	}
   230  	return n, n, err
   231  }
   232  
   233  // Span returns the first n bytes of src that conform to the Bidi rule.
   234  func (t *Transformer) Span(src []byte, atEOF bool) (n int, err error) {
   235  	if t.state == ruleInvalid && t.isRTL() {
   236  		return 0, ErrInvalid
   237  	}
   238  	n, ok := t.advance(src)
   239  	switch {
   240  	case !ok:
   241  		err = ErrInvalid
   242  	case n < len(src):
   243  		if !atEOF {
   244  			err = transform.ErrShortSrc
   245  			break
   246  		}
   247  		err = ErrInvalid
   248  	case !t.isFinal():
   249  		err = ErrInvalid
   250  	}
   251  	return n, err
   252  }
   253  
   254  // Precomputing the ASCII values decreases running time for the ASCII fast path
   255  // by about 30%.
   256  var asciiTable [128]bidi.Properties
   257  
   258  func init() {
   259  	for i := range asciiTable {
   260  		p, _ := bidi.LookupRune(rune(i))
   261  		asciiTable[i] = p
   262  	}
   263  }
   264  
   265  func (t *Transformer) advance(s []byte) (n int, ok bool) {
   266  	var e bidi.Properties
   267  	var sz int
   268  	for n < len(s) {
   269  		if s[n] < utf8.RuneSelf {
   270  			e, sz = asciiTable[s[n]], 1
   271  		} else {
   272  			e, sz = bidi.Lookup(s[n:])
   273  			if sz <= 1 {
   274  				if sz == 1 {
   275  					// We always consider invalid UTF-8 to be invalid, even if
   276  					// the string has not yet been determined to be RTL.
   277  					// TODO: is this correct?
   278  					return n, false
   279  				}
   280  				return n, true // incomplete UTF-8 encoding
   281  			}
   282  		}
   283  		// TODO: using CompactClass would result in noticeable speedup.
   284  		// See unicode/bidi/prop.go:Properties.CompactClass.
   285  		c := uint16(1 << e.Class())
   286  		t.seen |= c
   287  		if t.seen&exclusiveRTL == exclusiveRTL {
   288  			t.state = ruleInvalid
   289  			return n, false
   290  		}
   291  		switch tr := transitions[t.state]; {
   292  		case tr[0].mask&c != 0:
   293  			t.state = tr[0].next
   294  		case tr[1].mask&c != 0:
   295  			t.state = tr[1].next
   296  		default:
   297  			t.state = ruleInvalid
   298  			if t.isRTL() {
   299  				return n, false
   300  			}
   301  		}
   302  		n += sz
   303  	}
   304  	return n, true
   305  }
   306  
   307  func (t *Transformer) advanceString(s string) (n int, ok bool) {
   308  	var e bidi.Properties
   309  	var sz int
   310  	for n < len(s) {
   311  		if s[n] < utf8.RuneSelf {
   312  			e, sz = asciiTable[s[n]], 1
   313  		} else {
   314  			e, sz = bidi.LookupString(s[n:])
   315  			if sz <= 1 {
   316  				if sz == 1 {
   317  					return n, false // invalid UTF-8
   318  				}
   319  				return n, true // incomplete UTF-8 encoding
   320  			}
   321  		}
   322  		// TODO: using CompactClass results in noticeable speedup.
   323  		// See unicode/bidi/prop.go:Properties.CompactClass.
   324  		c := uint16(1 << e.Class())
   325  		t.seen |= c
   326  		if t.seen&exclusiveRTL == exclusiveRTL {
   327  			t.state = ruleInvalid
   328  			return n, false
   329  		}
   330  		switch tr := transitions[t.state]; {
   331  		case tr[0].mask&c != 0:
   332  			t.state = tr[0].next
   333  		case tr[1].mask&c != 0:
   334  			t.state = tr[1].next
   335  		default:
   336  			t.state = ruleInvalid
   337  			if t.isRTL() {
   338  				return n, false
   339  			}
   340  		}
   341  		n += sz
   342  	}
   343  	return n, true
   344  }