golang.org/toolchain@v0.0.1-go1.9rc2.windows-amd64/src/vendor/golang_org/x/text/unicode/bidi/prop.go (about)

     1  // Code generated by running "go run gen.go -core" in golang.org/x/text. DO NOT EDIT.
     2  
     3  // Copyright 2016 The Go Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  package bidi
     8  
     9  import "unicode/utf8"
    10  
    11  // Properties provides access to BiDi properties of runes.
    12  type Properties struct {
    13  	entry uint8
    14  	last  uint8
    15  }
    16  
    17  var trie = newBidiTrie(0)
    18  
    19  // TODO: using this for bidirule reduces the running time by about 5%. Consider
    20  // if this is worth exposing or if we can find a way to speed up the Class
    21  // method.
    22  //
    23  // // CompactClass is like Class, but maps all of the BiDi control classes
    24  // // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
    25  // func (p Properties) CompactClass() Class {
    26  // 	return Class(p.entry & 0x0F)
    27  // }
    28  
    29  // Class returns the Bidi class for p.
    30  func (p Properties) Class() Class {
    31  	c := Class(p.entry & 0x0F)
    32  	if c == Control {
    33  		c = controlByteToClass[p.last&0xF]
    34  	}
    35  	return c
    36  }
    37  
    38  // IsBracket reports whether the rune is a bracket.
    39  func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }
    40  
    41  // IsOpeningBracket reports whether the rune is an opening bracket.
    42  // IsBracket must return true.
    43  func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }
    44  
    45  // TODO: find a better API and expose.
    46  func (p Properties) reverseBracket(r rune) rune {
    47  	return xorMasks[p.entry>>xorMaskShift] ^ r
    48  }
    49  
    50  var controlByteToClass = [16]Class{
    51  	0xD: LRO, // U+202D LeftToRightOverride,
    52  	0xE: RLO, // U+202E RightToLeftOverride,
    53  	0xA: LRE, // U+202A LeftToRightEmbedding,
    54  	0xB: RLE, // U+202B RightToLeftEmbedding,
    55  	0xC: PDF, // U+202C PopDirectionalFormat,
    56  	0x6: LRI, // U+2066 LeftToRightIsolate,
    57  	0x7: RLI, // U+2067 RightToLeftIsolate,
    58  	0x8: FSI, // U+2068 FirstStrongIsolate,
    59  	0x9: PDI, // U+2069 PopDirectionalIsolate,
    60  }
    61  
    62  // LookupRune returns properties for r.
    63  func LookupRune(r rune) (p Properties, size int) {
    64  	var buf [4]byte
    65  	n := utf8.EncodeRune(buf[:], r)
    66  	return Lookup(buf[:n])
    67  }
    68  
    69  // TODO: these lookup methods are based on the generated trie code. The returned
    70  // sizes have slightly different semantics from the generated code, in that it
    71  // always returns size==1 for an illegal UTF-8 byte (instead of the length
    72  // of the maximum invalid subsequence). Most Transformers, like unicode/norm,
    73  // leave invalid UTF-8 untouched, in which case it has performance benefits to
    74  // do so (without changing the semantics). Bidi requires the semantics used here
    75  // for the bidirule implementation to be compatible with the Go semantics.
    76  //  They ultimately should perhaps be adopted by all trie implementations, for
    77  // convenience sake.
    78  // This unrolled code also boosts performance of the secure/bidirule package by
    79  // about 30%.
    80  // So, to remove this code:
    81  //   - add option to trie generator to define return type.
    82  //   - always return 1 byte size for ill-formed UTF-8 runes.
    83  
    84  // Lookup returns properties for the first rune in s and the width in bytes of
    85  // its encoding. The size will be 0 if s does not hold enough bytes to complete
    86  // the encoding.
    87  func Lookup(s []byte) (p Properties, sz int) {
    88  	c0 := s[0]
    89  	switch {
    90  	case c0 < 0x80: // is ASCII
    91  		return Properties{entry: bidiValues[c0]}, 1
    92  	case c0 < 0xC2:
    93  		return Properties{}, 1
    94  	case c0 < 0xE0: // 2-byte UTF-8
    95  		if len(s) < 2 {
    96  			return Properties{}, 0
    97  		}
    98  		i := bidiIndex[c0]
    99  		c1 := s[1]
   100  		if c1 < 0x80 || 0xC0 <= c1 {
   101  			return Properties{}, 1
   102  		}
   103  		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
   104  	case c0 < 0xF0: // 3-byte UTF-8
   105  		if len(s) < 3 {
   106  			return Properties{}, 0
   107  		}
   108  		i := bidiIndex[c0]
   109  		c1 := s[1]
   110  		if c1 < 0x80 || 0xC0 <= c1 {
   111  			return Properties{}, 1
   112  		}
   113  		o := uint32(i)<<6 + uint32(c1)
   114  		i = bidiIndex[o]
   115  		c2 := s[2]
   116  		if c2 < 0x80 || 0xC0 <= c2 {
   117  			return Properties{}, 1
   118  		}
   119  		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
   120  	case c0 < 0xF8: // 4-byte UTF-8
   121  		if len(s) < 4 {
   122  			return Properties{}, 0
   123  		}
   124  		i := bidiIndex[c0]
   125  		c1 := s[1]
   126  		if c1 < 0x80 || 0xC0 <= c1 {
   127  			return Properties{}, 1
   128  		}
   129  		o := uint32(i)<<6 + uint32(c1)
   130  		i = bidiIndex[o]
   131  		c2 := s[2]
   132  		if c2 < 0x80 || 0xC0 <= c2 {
   133  			return Properties{}, 1
   134  		}
   135  		o = uint32(i)<<6 + uint32(c2)
   136  		i = bidiIndex[o]
   137  		c3 := s[3]
   138  		if c3 < 0x80 || 0xC0 <= c3 {
   139  			return Properties{}, 1
   140  		}
   141  		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
   142  	}
   143  	// Illegal rune
   144  	return Properties{}, 1
   145  }
   146  
   147  // LookupString returns properties for the first rune in s and the width in
   148  // bytes of its encoding. The size will be 0 if s does not hold enough bytes to
   149  // complete the encoding.
   150  func LookupString(s string) (p Properties, sz int) {
   151  	c0 := s[0]
   152  	switch {
   153  	case c0 < 0x80: // is ASCII
   154  		return Properties{entry: bidiValues[c0]}, 1
   155  	case c0 < 0xC2:
   156  		return Properties{}, 1
   157  	case c0 < 0xE0: // 2-byte UTF-8
   158  		if len(s) < 2 {
   159  			return Properties{}, 0
   160  		}
   161  		i := bidiIndex[c0]
   162  		c1 := s[1]
   163  		if c1 < 0x80 || 0xC0 <= c1 {
   164  			return Properties{}, 1
   165  		}
   166  		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
   167  	case c0 < 0xF0: // 3-byte UTF-8
   168  		if len(s) < 3 {
   169  			return Properties{}, 0
   170  		}
   171  		i := bidiIndex[c0]
   172  		c1 := s[1]
   173  		if c1 < 0x80 || 0xC0 <= c1 {
   174  			return Properties{}, 1
   175  		}
   176  		o := uint32(i)<<6 + uint32(c1)
   177  		i = bidiIndex[o]
   178  		c2 := s[2]
   179  		if c2 < 0x80 || 0xC0 <= c2 {
   180  			return Properties{}, 1
   181  		}
   182  		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
   183  	case c0 < 0xF8: // 4-byte UTF-8
   184  		if len(s) < 4 {
   185  			return Properties{}, 0
   186  		}
   187  		i := bidiIndex[c0]
   188  		c1 := s[1]
   189  		if c1 < 0x80 || 0xC0 <= c1 {
   190  			return Properties{}, 1
   191  		}
   192  		o := uint32(i)<<6 + uint32(c1)
   193  		i = bidiIndex[o]
   194  		c2 := s[2]
   195  		if c2 < 0x80 || 0xC0 <= c2 {
   196  			return Properties{}, 1
   197  		}
   198  		o = uint32(i)<<6 + uint32(c2)
   199  		i = bidiIndex[o]
   200  		c3 := s[3]
   201  		if c3 < 0x80 || 0xC0 <= c3 {
   202  			return Properties{}, 1
   203  		}
   204  		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
   205  	}
   206  	// Illegal rune
   207  	return Properties{}, 1
   208  }