github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/x/text/secure/bidirule/bidirule.go (about)

     1  // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
     2  
     3  // Copyright 2016 The Go Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  // Package bidirule implements the Bidi Rule defined by RFC 5893.
     8  //
     9  // This package is under development. The API may change without notice and
    10  // without preserving backward compatibility.
    11  package bidirule
    12  
    13  import (
    14  	"errors"
    15  	"unicode/utf8"
    16  
    17  	"github.com/icodeface/tls/internal/x/text/transform"
    18  	"github.com/icodeface/tls/internal/x/text/unicode/bidi"
    19  )
    20  
    21  // This file contains an implementation of RFC 5893: Right-to-Left Scripts for
    22  // Internationalized Domain Names for Applications (IDNA)
    23  //
    24  // A label is an individual component of a domain name.  Labels are usually
    25  // shown separated by dots; for example, the domain name "www.example.com" is
    26  // composed of three labels: "www", "example", and "com".
    27  //
    28  // An RTL label is a label that contains at least one character of class R, AL,
    29  // or AN. An LTR label is any label that is not an RTL label.
    30  //
    31  // A "Bidi domain name" is a domain name that contains at least one RTL label.
    32  //
    33  //  The following guarantees can be made based on the above:
    34  //
    35  //  o  In a domain name consisting of only labels that satisfy the rule,
    36  //     the requirements of Section 3 are satisfied.  Note that even LTR
    37  //     labels and pure ASCII labels have to be tested.
    38  //
    39  //  o  In a domain name consisting of only LDH labels (as defined in the
    40  //     Definitions document [RFC5890]) and labels that satisfy the rule,
    41  //     the requirements of Section 3 are satisfied as long as a label
    42  //     that starts with an ASCII digit does not come after a
    43  //     right-to-left label.
    44  //
    45  //  No guarantee is given for other combinations.
    46  
    47  // ErrInvalid indicates a label is invalid according to the Bidi Rule.
    48  var ErrInvalid = errors.New("bidirule: failed Bidi Rule")
    49  
    50  type ruleState uint8
    51  
    52  const (
    53  	ruleInitial ruleState = iota
    54  	ruleLTR
    55  	ruleLTRFinal
    56  	ruleRTL
    57  	ruleRTLFinal
    58  	ruleInvalid
    59  )
    60  
    61  type ruleTransition struct {
    62  	next ruleState
    63  	mask uint16
    64  }
    65  
    66  var transitions = [...][2]ruleTransition{
    67  	// [2.1] The first character must be a character with Bidi property L, R, or
    68  	// AL. If it has the R or AL property, it is an RTL label; if it has the L
    69  	// property, it is an LTR label.
    70  	ruleInitial: {
    71  		{ruleLTRFinal, 1 << bidi.L},
    72  		{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL},
    73  	},
    74  	ruleRTL: {
    75  		// [2.3] In an RTL label, the end of the label must be a character with
    76  		// Bidi property R, AL, EN, or AN, followed by zero or more characters
    77  		// with Bidi property NSM.
    78  		{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN},
    79  
    80  		// [2.2] In an RTL label, only characters with the Bidi properties R,
    81  		// AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
    82  		// We exclude the entries from [2.3]
    83  		{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
    84  	},
    85  	ruleRTLFinal: {
    86  		// [2.3] In an RTL label, the end of the label must be a character with
    87  		// Bidi property R, AL, EN, or AN, followed by zero or more characters
    88  		// with Bidi property NSM.
    89  		{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN | 1<<bidi.NSM},
    90  
    91  		// [2.2] In an RTL label, only characters with the Bidi properties R,
    92  		// AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
    93  		// We exclude the entries from [2.3] and NSM.
    94  		{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
    95  	},
    96  	ruleLTR: {
    97  		// [2.6] In an LTR label, the end of the label must be a character with
    98  		// Bidi property L or EN, followed by zero or more characters with Bidi
    99  		// property NSM.
   100  		{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN},
   101  
   102  		// [2.5] In an LTR label, only characters with the Bidi properties L,
   103  		// EN, ES, CS, ET, ON, BN, or NSM are allowed.
   104  		// We exclude the entries from [2.6].
   105  		{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
   106  	},
   107  	ruleLTRFinal: {
   108  		// [2.6] In an LTR label, the end of the label must be a character with
   109  		// Bidi property L or EN, followed by zero or more characters with Bidi
   110  		// property NSM.
   111  		{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN | 1<<bidi.NSM},
   112  
   113  		// [2.5] In an LTR label, only characters with the Bidi properties L,
   114  		// EN, ES, CS, ET, ON, BN, or NSM are allowed.
   115  		// We exclude the entries from [2.6].
   116  		{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
   117  	},
   118  	ruleInvalid: {
   119  		{ruleInvalid, 0},
   120  		{ruleInvalid, 0},
   121  	},
   122  }
   123  
   124  // [2.4] In an RTL label, if an EN is present, no AN may be present, and
   125  // vice versa.
   126  const exclusiveRTL = uint16(1<<bidi.EN | 1<<bidi.AN)
   127  
   128  // From RFC 5893
   129  // An RTL label is a label that contains at least one character of type
   130  // R, AL, or AN.
   131  //
   132  // An LTR label is any label that is not an RTL label.
   133  
   134  // Direction reports the direction of the given label as defined by RFC 5893.
   135  // The Bidi Rule does not have to be applied to labels of the category
   136  // LeftToRight.
   137  func Direction(b []byte) bidi.Direction {
   138  	for i := 0; i < len(b); {
   139  		e, sz := bidi.Lookup(b[i:])
   140  		if sz == 0 {
   141  			i++
   142  		}
   143  		c := e.Class()
   144  		if c == bidi.R || c == bidi.AL || c == bidi.AN {
   145  			return bidi.RightToLeft
   146  		}
   147  		i += sz
   148  	}
   149  	return bidi.LeftToRight
   150  }
   151  
   152  // DirectionString reports the direction of the given label as defined by RFC
   153  // 5893. The Bidi Rule does not have to be applied to labels of the category
   154  // LeftToRight.
   155  func DirectionString(s string) bidi.Direction {
   156  	for i := 0; i < len(s); {
   157  		e, sz := bidi.LookupString(s[i:])
   158  		if sz == 0 {
   159  			i++
   160  			continue
   161  		}
   162  		c := e.Class()
   163  		if c == bidi.R || c == bidi.AL || c == bidi.AN {
   164  			return bidi.RightToLeft
   165  		}
   166  		i += sz
   167  	}
   168  	return bidi.LeftToRight
   169  }
   170  
   171  // Valid reports whether b conforms to the BiDi rule.
   172  func Valid(b []byte) bool {
   173  	var t Transformer
   174  	if n, ok := t.advance(b); !ok || n < len(b) {
   175  		return false
   176  	}
   177  	return t.isFinal()
   178  }
   179  
   180  // ValidString reports whether s conforms to the BiDi rule.
   181  func ValidString(s string) bool {
   182  	var t Transformer
   183  	if n, ok := t.advanceString(s); !ok || n < len(s) {
   184  		return false
   185  	}
   186  	return t.isFinal()
   187  }
   188  
   189  // New returns a Transformer that verifies that input adheres to the Bidi Rule.
   190  func New() *Transformer {
   191  	return &Transformer{}
   192  }
   193  
   194  // Transformer implements transform.Transform.
   195  type Transformer struct {
   196  	state  ruleState
   197  	hasRTL bool
   198  	seen   uint16
   199  }
   200  
   201  // A rule can only be violated for "Bidi Domain names", meaning if one of the
   202  // following categories has been observed.
   203  func (t *Transformer) isRTL() bool {
   204  	const isRTL = 1<<bidi.R | 1<<bidi.AL | 1<<bidi.AN
   205  	return t.seen&isRTL != 0
   206  }
   207  
   208  func (t *Transformer) isFinal() bool {
   209  	return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
   210  }
   211  
   212  // Reset implements transform.Transformer.
   213  func (t *Transformer) Reset() { *t = Transformer{} }
   214  
   215  // Transform implements transform.Transformer. This Transformer has state and
   216  // needs to be reset between uses.
   217  func (t *Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
   218  	if len(dst) < len(src) {
   219  		src = src[:len(dst)]
   220  		atEOF = false
   221  		err = transform.ErrShortDst
   222  	}
   223  	n, err1 := t.Span(src, atEOF)
   224  	copy(dst, src[:n])
   225  	if err == nil || err1 != nil && err1 != transform.ErrShortSrc {
   226  		err = err1
   227  	}
   228  	return n, n, err
   229  }
   230  
   231  // Span returns the first n bytes of src that conform to the Bidi rule.
   232  func (t *Transformer) Span(src []byte, atEOF bool) (n int, err error) {
   233  	if t.state == ruleInvalid && t.isRTL() {
   234  		return 0, ErrInvalid
   235  	}
   236  	n, ok := t.advance(src)
   237  	switch {
   238  	case !ok:
   239  		err = ErrInvalid
   240  	case n < len(src):
   241  		if !atEOF {
   242  			err = transform.ErrShortSrc
   243  			break
   244  		}
   245  		err = ErrInvalid
   246  	case !t.isFinal():
   247  		err = ErrInvalid
   248  	}
   249  	return n, err
   250  }
   251  
   252  // Precomputing the ASCII values decreases running time for the ASCII fast path
   253  // by about 30%.
   254  var asciiTable [128]bidi.Properties
   255  
   256  func init() {
   257  	for i := range asciiTable {
   258  		p, _ := bidi.LookupRune(rune(i))
   259  		asciiTable[i] = p
   260  	}
   261  }
   262  
   263  func (t *Transformer) advance(s []byte) (n int, ok bool) {
   264  	var e bidi.Properties
   265  	var sz int
   266  	for n < len(s) {
   267  		if s[n] < utf8.RuneSelf {
   268  			e, sz = asciiTable[s[n]], 1
   269  		} else {
   270  			e, sz = bidi.Lookup(s[n:])
   271  			if sz <= 1 {
   272  				if sz == 1 {
   273  					// We always consider invalid UTF-8 to be invalid, even if
   274  					// the string has not yet been determined to be RTL.
   275  					// TODO: is this correct?
   276  					return n, false
   277  				}
   278  				return n, true // incomplete UTF-8 encoding
   279  			}
   280  		}
   281  		// TODO: using CompactClass would result in noticeable speedup.
   282  		// See unicode/bidi/prop.go:Properties.CompactClass.
   283  		c := uint16(1 << e.Class())
   284  		t.seen |= c
   285  		if t.seen&exclusiveRTL == exclusiveRTL {
   286  			t.state = ruleInvalid
   287  			return n, false
   288  		}
   289  		switch tr := transitions[t.state]; {
   290  		case tr[0].mask&c != 0:
   291  			t.state = tr[0].next
   292  		case tr[1].mask&c != 0:
   293  			t.state = tr[1].next
   294  		default:
   295  			t.state = ruleInvalid
   296  			if t.isRTL() {
   297  				return n, false
   298  			}
   299  		}
   300  		n += sz
   301  	}
   302  	return n, true
   303  }
   304  
   305  func (t *Transformer) advanceString(s string) (n int, ok bool) {
   306  	var e bidi.Properties
   307  	var sz int
   308  	for n < len(s) {
   309  		if s[n] < utf8.RuneSelf {
   310  			e, sz = asciiTable[s[n]], 1
   311  		} else {
   312  			e, sz = bidi.LookupString(s[n:])
   313  			if sz <= 1 {
   314  				if sz == 1 {
   315  					return n, false // invalid UTF-8
   316  				}
   317  				return n, true // incomplete UTF-8 encoding
   318  			}
   319  		}
   320  		// TODO: using CompactClass results in noticeable speedup.
   321  		// See unicode/bidi/prop.go:Properties.CompactClass.
   322  		c := uint16(1 << e.Class())
   323  		t.seen |= c
   324  		if t.seen&exclusiveRTL == exclusiveRTL {
   325  			t.state = ruleInvalid
   326  			return n, false
   327  		}
   328  		switch tr := transitions[t.state]; {
   329  		case tr[0].mask&c != 0:
   330  			t.state = tr[0].next
   331  		case tr[1].mask&c != 0:
   332  			t.state = tr[1].next
   333  		default:
   334  			t.state = ruleInvalid
   335  			if t.isRTL() {
   336  				return n, false
   337  			}
   338  		}
   339  		n += sz
   340  	}
   341  	return n, true
   342  }