github.com/go-enjin/golang-org-x-text@v0.12.1-enjin.2/internal/triegen/data_test.go (about)

     1  // This file is generated with "go test -tags generate". DO NOT EDIT!
     2  //go:build !generate
     3  // +build !generate
     4  
     5  package triegen_test
     6  
     7  // lookup returns the trie value for the first UTF-8 encoding in s and
     8  // the width in bytes of this encoding. The size will be 0 if s does not
     9  // hold enough bytes to complete the encoding. len(s) must be greater than 0.
    10  func (t *randTrie) lookup(s []byte) (v uint8, sz int) {
    11  	c0 := s[0]
    12  	switch {
    13  	case c0 < 0x80: // is ASCII
    14  		return randValues[c0], 1
    15  	case c0 < 0xC2:
    16  		return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
    17  	case c0 < 0xE0: // 2-byte UTF-8
    18  		if len(s) < 2 {
    19  			return 0, 0
    20  		}
    21  		i := randIndex[c0]
    22  		c1 := s[1]
    23  		if c1 < 0x80 || 0xC0 <= c1 {
    24  			return 0, 1 // Illegal UTF-8: not a continuation byte.
    25  		}
    26  		return t.lookupValue(uint32(i), c1), 2
    27  	case c0 < 0xF0: // 3-byte UTF-8
    28  		if len(s) < 3 {
    29  			return 0, 0
    30  		}
    31  		i := randIndex[c0]
    32  		c1 := s[1]
    33  		if c1 < 0x80 || 0xC0 <= c1 {
    34  			return 0, 1 // Illegal UTF-8: not a continuation byte.
    35  		}
    36  		o := uint32(i)<<6 + uint32(c1)
    37  		i = randIndex[o]
    38  		c2 := s[2]
    39  		if c2 < 0x80 || 0xC0 <= c2 {
    40  			return 0, 2 // Illegal UTF-8: not a continuation byte.
    41  		}
    42  		return t.lookupValue(uint32(i), c2), 3
    43  	case c0 < 0xF8: // 4-byte UTF-8
    44  		if len(s) < 4 {
    45  			return 0, 0
    46  		}
    47  		i := randIndex[c0]
    48  		c1 := s[1]
    49  		if c1 < 0x80 || 0xC0 <= c1 {
    50  			return 0, 1 // Illegal UTF-8: not a continuation byte.
    51  		}
    52  		o := uint32(i)<<6 + uint32(c1)
    53  		i = randIndex[o]
    54  		c2 := s[2]
    55  		if c2 < 0x80 || 0xC0 <= c2 {
    56  			return 0, 2 // Illegal UTF-8: not a continuation byte.
    57  		}
    58  		o = uint32(i)<<6 + uint32(c2)
    59  		i = randIndex[o]
    60  		c3 := s[3]
    61  		if c3 < 0x80 || 0xC0 <= c3 {
    62  			return 0, 3 // Illegal UTF-8: not a continuation byte.
    63  		}
    64  		return t.lookupValue(uint32(i), c3), 4
    65  	}
    66  	// Illegal rune
    67  	return 0, 1
    68  }
    69  
    70  // lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
    71  // s must start with a full and valid UTF-8 encoded rune.
    72  func (t *randTrie) lookupUnsafe(s []byte) uint8 {
    73  	c0 := s[0]
    74  	if c0 < 0x80 { // is ASCII
    75  		return randValues[c0]
    76  	}
    77  	i := randIndex[c0]
    78  	if c0 < 0xE0 { // 2-byte UTF-8
    79  		return t.lookupValue(uint32(i), s[1])
    80  	}
    81  	i = randIndex[uint32(i)<<6+uint32(s[1])]
    82  	if c0 < 0xF0 { // 3-byte UTF-8
    83  		return t.lookupValue(uint32(i), s[2])
    84  	}
    85  	i = randIndex[uint32(i)<<6+uint32(s[2])]
    86  	if c0 < 0xF8 { // 4-byte UTF-8
    87  		return t.lookupValue(uint32(i), s[3])
    88  	}
    89  	return 0
    90  }
    91  
    92  // lookupString returns the trie value for the first UTF-8 encoding in s and
    93  // the width in bytes of this encoding. The size will be 0 if s does not
    94  // hold enough bytes to complete the encoding. len(s) must be greater than 0.
    95  func (t *randTrie) lookupString(s string) (v uint8, sz int) {
    96  	c0 := s[0]
    97  	switch {
    98  	case c0 < 0x80: // is ASCII
    99  		return randValues[c0], 1
   100  	case c0 < 0xC2:
   101  		return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
   102  	case c0 < 0xE0: // 2-byte UTF-8
   103  		if len(s) < 2 {
   104  			return 0, 0
   105  		}
   106  		i := randIndex[c0]
   107  		c1 := s[1]
   108  		if c1 < 0x80 || 0xC0 <= c1 {
   109  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   110  		}
   111  		return t.lookupValue(uint32(i), c1), 2
   112  	case c0 < 0xF0: // 3-byte UTF-8
   113  		if len(s) < 3 {
   114  			return 0, 0
   115  		}
   116  		i := randIndex[c0]
   117  		c1 := s[1]
   118  		if c1 < 0x80 || 0xC0 <= c1 {
   119  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   120  		}
   121  		o := uint32(i)<<6 + uint32(c1)
   122  		i = randIndex[o]
   123  		c2 := s[2]
   124  		if c2 < 0x80 || 0xC0 <= c2 {
   125  			return 0, 2 // Illegal UTF-8: not a continuation byte.
   126  		}
   127  		return t.lookupValue(uint32(i), c2), 3
   128  	case c0 < 0xF8: // 4-byte UTF-8
   129  		if len(s) < 4 {
   130  			return 0, 0
   131  		}
   132  		i := randIndex[c0]
   133  		c1 := s[1]
   134  		if c1 < 0x80 || 0xC0 <= c1 {
   135  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   136  		}
   137  		o := uint32(i)<<6 + uint32(c1)
   138  		i = randIndex[o]
   139  		c2 := s[2]
   140  		if c2 < 0x80 || 0xC0 <= c2 {
   141  			return 0, 2 // Illegal UTF-8: not a continuation byte.
   142  		}
   143  		o = uint32(i)<<6 + uint32(c2)
   144  		i = randIndex[o]
   145  		c3 := s[3]
   146  		if c3 < 0x80 || 0xC0 <= c3 {
   147  			return 0, 3 // Illegal UTF-8: not a continuation byte.
   148  		}
   149  		return t.lookupValue(uint32(i), c3), 4
   150  	}
   151  	// Illegal rune
   152  	return 0, 1
   153  }
   154  
   155  // lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
   156  // s must start with a full and valid UTF-8 encoded rune.
   157  func (t *randTrie) lookupStringUnsafe(s string) uint8 {
   158  	c0 := s[0]
   159  	if c0 < 0x80 { // is ASCII
   160  		return randValues[c0]
   161  	}
   162  	i := randIndex[c0]
   163  	if c0 < 0xE0 { // 2-byte UTF-8
   164  		return t.lookupValue(uint32(i), s[1])
   165  	}
   166  	i = randIndex[uint32(i)<<6+uint32(s[1])]
   167  	if c0 < 0xF0 { // 3-byte UTF-8
   168  		return t.lookupValue(uint32(i), s[2])
   169  	}
   170  	i = randIndex[uint32(i)<<6+uint32(s[2])]
   171  	if c0 < 0xF8 { // 4-byte UTF-8
   172  		return t.lookupValue(uint32(i), s[3])
   173  	}
   174  	return 0
   175  }
   176  
   177  // randTrie. Total size: 9280 bytes (9.06 KiB). Checksum: 6debd324a8debb8f.
   178  type randTrie struct{}
   179  
   180  func newRandTrie(i int) *randTrie {
   181  	return &randTrie{}
   182  }
   183  
   184  // lookupValue determines the type of block n and looks up the value for b.
   185  func (t *randTrie) lookupValue(n uint32, b byte) uint8 {
   186  	switch {
   187  	default:
   188  		return uint8(randValues[n<<6+uint32(b)])
   189  	}
   190  }
   191  
   192  // randValues: 56 blocks, 3584 entries, 3584 bytes
   193  // The third block is the zero block.
   194  var randValues = [3584]uint8{
   195  	// Block 0x0, offset 0x0
   196  	// Block 0x1, offset 0x40
   197  	// Block 0x2, offset 0x80
   198  	// Block 0x3, offset 0xc0
   199  	0xc9: 0x0001,
   200  	// Block 0x4, offset 0x100
   201  	0x100: 0x0001,
   202  	// Block 0x5, offset 0x140
   203  	0x155: 0x0001,
   204  	// Block 0x6, offset 0x180
   205  	0x196: 0x0001,
   206  	// Block 0x7, offset 0x1c0
   207  	0x1ef: 0x0001,
   208  	// Block 0x8, offset 0x200
   209  	0x206: 0x0001,
   210  	// Block 0x9, offset 0x240
   211  	0x258: 0x0001,
   212  	// Block 0xa, offset 0x280
   213  	0x288: 0x0001,
   214  	// Block 0xb, offset 0x2c0
   215  	0x2f2: 0x0001,
   216  	// Block 0xc, offset 0x300
   217  	0x304: 0x0001,
   218  	// Block 0xd, offset 0x340
   219  	0x34b: 0x0001,
   220  	// Block 0xe, offset 0x380
   221  	0x3ba: 0x0001,
   222  	// Block 0xf, offset 0x3c0
   223  	0x3f5: 0x0001,
   224  	// Block 0x10, offset 0x400
   225  	0x41d: 0x0001,
   226  	// Block 0x11, offset 0x440
   227  	0x442: 0x0001,
   228  	// Block 0x12, offset 0x480
   229  	0x4bb: 0x0001,
   230  	// Block 0x13, offset 0x4c0
   231  	0x4e9: 0x0001,
   232  	// Block 0x14, offset 0x500
   233  	0x53e: 0x0001,
   234  	// Block 0x15, offset 0x540
   235  	0x55f: 0x0001,
   236  	// Block 0x16, offset 0x580
   237  	0x5b7: 0x0001,
   238  	// Block 0x17, offset 0x5c0
   239  	0x5d9: 0x0001,
   240  	// Block 0x18, offset 0x600
   241  	0x60e: 0x0001,
   242  	// Block 0x19, offset 0x640
   243  	0x652: 0x0001,
   244  	// Block 0x1a, offset 0x680
   245  	0x68f: 0x0001,
   246  	// Block 0x1b, offset 0x6c0
   247  	0x6dc: 0x0001,
   248  	// Block 0x1c, offset 0x700
   249  	0x703: 0x0001,
   250  	// Block 0x1d, offset 0x740
   251  	0x741: 0x0001,
   252  	// Block 0x1e, offset 0x780
   253  	0x79b: 0x0001,
   254  	// Block 0x1f, offset 0x7c0
   255  	0x7f1: 0x0001,
   256  	// Block 0x20, offset 0x800
   257  	0x833: 0x0001,
   258  	// Block 0x21, offset 0x840
   259  	0x853: 0x0001,
   260  	// Block 0x22, offset 0x880
   261  	0x8a2: 0x0001,
   262  	// Block 0x23, offset 0x8c0
   263  	0x8f8: 0x0001,
   264  	// Block 0x24, offset 0x900
   265  	0x917: 0x0001,
   266  	// Block 0x25, offset 0x940
   267  	0x945: 0x0001,
   268  	// Block 0x26, offset 0x980
   269  	0x99e: 0x0001,
   270  	// Block 0x27, offset 0x9c0
   271  	0x9fd: 0x0001,
   272  	// Block 0x28, offset 0xa00
   273  	0xa0d: 0x0001,
   274  	// Block 0x29, offset 0xa40
   275  	0xa66: 0x0001,
   276  	// Block 0x2a, offset 0xa80
   277  	0xaab: 0x0001,
   278  	// Block 0x2b, offset 0xac0
   279  	0xaea: 0x0001,
   280  	// Block 0x2c, offset 0xb00
   281  	0xb2d: 0x0001,
   282  	// Block 0x2d, offset 0xb40
   283  	0xb54: 0x0001,
   284  	// Block 0x2e, offset 0xb80
   285  	0xb90: 0x0001,
   286  	// Block 0x2f, offset 0xbc0
   287  	0xbe5: 0x0001,
   288  	// Block 0x30, offset 0xc00
   289  	0xc28: 0x0001,
   290  	// Block 0x31, offset 0xc40
   291  	0xc7c: 0x0001,
   292  	// Block 0x32, offset 0xc80
   293  	0xcbf: 0x0001,
   294  	// Block 0x33, offset 0xcc0
   295  	0xcc7: 0x0001,
   296  	// Block 0x34, offset 0xd00
   297  	0xd34: 0x0001,
   298  	// Block 0x35, offset 0xd40
   299  	0xd61: 0x0001,
   300  	// Block 0x36, offset 0xd80
   301  	0xdb9: 0x0001,
   302  	// Block 0x37, offset 0xdc0
   303  	0xdda: 0x0001,
   304  }
   305  
   306  // randIndex: 89 blocks, 5696 entries, 5696 bytes
   307  // Block 0 is the zero block.
   308  var randIndex = [5696]uint8{
   309  	// Block 0x0, offset 0x0
   310  	// Block 0x1, offset 0x40
   311  	// Block 0x2, offset 0x80
   312  	// Block 0x3, offset 0xc0
   313  	0xe1: 0x02, 0xe3: 0x03, 0xe4: 0x04,
   314  	0xea: 0x05, 0xeb: 0x06, 0xec: 0x07,
   315  	0xf0: 0x10, 0xf1: 0x24, 0xf2: 0x3d, 0xf3: 0x4f, 0xf4: 0x56,
   316  	// Block 0x4, offset 0x100
   317  	0x107: 0x01,
   318  	// Block 0x5, offset 0x140
   319  	0x16c: 0x02,
   320  	// Block 0x6, offset 0x180
   321  	0x19c: 0x03,
   322  	0x1ae: 0x04,
   323  	// Block 0x7, offset 0x1c0
   324  	0x1d8: 0x05,
   325  	0x1f7: 0x06,
   326  	// Block 0x8, offset 0x200
   327  	0x20c: 0x07,
   328  	// Block 0x9, offset 0x240
   329  	0x24a: 0x08,
   330  	// Block 0xa, offset 0x280
   331  	0x2b6: 0x09,
   332  	// Block 0xb, offset 0x2c0
   333  	0x2d5: 0x0a,
   334  	// Block 0xc, offset 0x300
   335  	0x31a: 0x0b,
   336  	// Block 0xd, offset 0x340
   337  	0x373: 0x0c,
   338  	// Block 0xe, offset 0x380
   339  	0x38b: 0x0d,
   340  	// Block 0xf, offset 0x3c0
   341  	0x3f0: 0x0e,
   342  	// Block 0x10, offset 0x400
   343  	0x433: 0x0f,
   344  	// Block 0x11, offset 0x440
   345  	0x45d: 0x10,
   346  	// Block 0x12, offset 0x480
   347  	0x491: 0x08, 0x494: 0x09, 0x497: 0x0a,
   348  	0x49b: 0x0b, 0x49c: 0x0c,
   349  	0x4a1: 0x0d,
   350  	0x4ad: 0x0e,
   351  	0x4ba: 0x0f,
   352  	// Block 0x13, offset 0x4c0
   353  	0x4c1: 0x11,
   354  	// Block 0x14, offset 0x500
   355  	0x531: 0x12,
   356  	// Block 0x15, offset 0x540
   357  	0x546: 0x13,
   358  	// Block 0x16, offset 0x580
   359  	0x5ab: 0x14,
   360  	// Block 0x17, offset 0x5c0
   361  	0x5d4: 0x11,
   362  	0x5fe: 0x11,
   363  	// Block 0x18, offset 0x600
   364  	0x618: 0x0a,
   365  	// Block 0x19, offset 0x640
   366  	0x65b: 0x15,
   367  	// Block 0x1a, offset 0x680
   368  	0x6a0: 0x16,
   369  	// Block 0x1b, offset 0x6c0
   370  	0x6d2: 0x17,
   371  	0x6f6: 0x18,
   372  	// Block 0x1c, offset 0x700
   373  	0x711: 0x19,
   374  	// Block 0x1d, offset 0x740
   375  	0x768: 0x1a,
   376  	// Block 0x1e, offset 0x780
   377  	0x783: 0x1b,
   378  	// Block 0x1f, offset 0x7c0
   379  	0x7f9: 0x1c,
   380  	// Block 0x20, offset 0x800
   381  	0x831: 0x1d,
   382  	// Block 0x21, offset 0x840
   383  	0x85e: 0x1e,
   384  	// Block 0x22, offset 0x880
   385  	0x898: 0x1f,
   386  	// Block 0x23, offset 0x8c0
   387  	0x8c7: 0x18,
   388  	0x8d5: 0x14,
   389  	0x8f7: 0x20,
   390  	0x8fe: 0x1f,
   391  	// Block 0x24, offset 0x900
   392  	0x905: 0x21,
   393  	// Block 0x25, offset 0x940
   394  	0x966: 0x03,
   395  	// Block 0x26, offset 0x980
   396  	0x981: 0x07, 0x983: 0x11,
   397  	0x989: 0x12, 0x98a: 0x13, 0x98e: 0x14, 0x98f: 0x15,
   398  	0x992: 0x16, 0x995: 0x17, 0x996: 0x18,
   399  	0x998: 0x19, 0x999: 0x1a, 0x99b: 0x1b, 0x99f: 0x1c,
   400  	0x9a3: 0x1d,
   401  	0x9ad: 0x1e, 0x9af: 0x1f,
   402  	0x9b0: 0x20, 0x9b1: 0x21,
   403  	0x9b8: 0x22, 0x9bd: 0x23,
   404  	// Block 0x27, offset 0x9c0
   405  	0x9cd: 0x22,
   406  	// Block 0x28, offset 0xa00
   407  	0xa0c: 0x08,
   408  	// Block 0x29, offset 0xa40
   409  	0xa6f: 0x1c,
   410  	// Block 0x2a, offset 0xa80
   411  	0xa90: 0x1a,
   412  	0xaaf: 0x23,
   413  	// Block 0x2b, offset 0xac0
   414  	0xae3: 0x19,
   415  	0xae8: 0x24,
   416  	0xafc: 0x25,
   417  	// Block 0x2c, offset 0xb00
   418  	0xb13: 0x26,
   419  	// Block 0x2d, offset 0xb40
   420  	0xb67: 0x1c,
   421  	// Block 0x2e, offset 0xb80
   422  	0xb8f: 0x0b,
   423  	// Block 0x2f, offset 0xbc0
   424  	0xbcb: 0x27,
   425  	0xbe7: 0x26,
   426  	// Block 0x30, offset 0xc00
   427  	0xc34: 0x16,
   428  	// Block 0x31, offset 0xc40
   429  	0xc62: 0x03,
   430  	// Block 0x32, offset 0xc80
   431  	0xcbb: 0x12,
   432  	// Block 0x33, offset 0xcc0
   433  	0xcdf: 0x09,
   434  	// Block 0x34, offset 0xd00
   435  	0xd34: 0x0a,
   436  	// Block 0x35, offset 0xd40
   437  	0xd41: 0x1e,
   438  	// Block 0x36, offset 0xd80
   439  	0xd83: 0x28,
   440  	// Block 0x37, offset 0xdc0
   441  	0xdc0: 0x15,
   442  	// Block 0x38, offset 0xe00
   443  	0xe1a: 0x15,
   444  	// Block 0x39, offset 0xe40
   445  	0xe65: 0x29,
   446  	// Block 0x3a, offset 0xe80
   447  	0xe86: 0x1f,
   448  	// Block 0x3b, offset 0xec0
   449  	0xeec: 0x18,
   450  	// Block 0x3c, offset 0xf00
   451  	0xf28: 0x2a,
   452  	// Block 0x3d, offset 0xf40
   453  	0xf53: 0x08,
   454  	// Block 0x3e, offset 0xf80
   455  	0xfa2: 0x2b,
   456  	0xfaa: 0x17,
   457  	// Block 0x3f, offset 0xfc0
   458  	0xfc0: 0x25, 0xfc2: 0x26,
   459  	0xfc9: 0x27, 0xfcd: 0x28, 0xfce: 0x29,
   460  	0xfd5: 0x2a,
   461  	0xfd8: 0x2b, 0xfd9: 0x2c, 0xfdf: 0x2d,
   462  	0xfe1: 0x2e, 0xfe2: 0x2f, 0xfe3: 0x30, 0xfe6: 0x31,
   463  	0xfe9: 0x32, 0xfec: 0x33, 0xfed: 0x34, 0xfef: 0x35,
   464  	0xff1: 0x36, 0xff2: 0x37, 0xff3: 0x38, 0xff4: 0x39,
   465  	0xffa: 0x3a, 0xffc: 0x3b, 0xffe: 0x3c,
   466  	// Block 0x40, offset 0x1000
   467  	0x102c: 0x2c,
   468  	// Block 0x41, offset 0x1040
   469  	0x1074: 0x2c,
   470  	// Block 0x42, offset 0x1080
   471  	0x108c: 0x08,
   472  	0x10a0: 0x2d,
   473  	// Block 0x43, offset 0x10c0
   474  	0x10e8: 0x10,
   475  	// Block 0x44, offset 0x1100
   476  	0x110f: 0x13,
   477  	// Block 0x45, offset 0x1140
   478  	0x114b: 0x2e,
   479  	// Block 0x46, offset 0x1180
   480  	0x118b: 0x23,
   481  	0x119d: 0x0c,
   482  	// Block 0x47, offset 0x11c0
   483  	0x11c3: 0x12,
   484  	0x11f9: 0x0f,
   485  	// Block 0x48, offset 0x1200
   486  	0x121e: 0x1b,
   487  	// Block 0x49, offset 0x1240
   488  	0x1270: 0x2f,
   489  	// Block 0x4a, offset 0x1280
   490  	0x128a: 0x1b,
   491  	0x12a7: 0x02,
   492  	// Block 0x4b, offset 0x12c0
   493  	0x12fb: 0x14,
   494  	// Block 0x4c, offset 0x1300
   495  	0x1333: 0x30,
   496  	// Block 0x4d, offset 0x1340
   497  	0x134d: 0x31,
   498  	// Block 0x4e, offset 0x1380
   499  	0x138e: 0x15,
   500  	// Block 0x4f, offset 0x13c0
   501  	0x13f4: 0x32,
   502  	// Block 0x50, offset 0x1400
   503  	0x141b: 0x33,
   504  	// Block 0x51, offset 0x1440
   505  	0x1448: 0x3e, 0x1449: 0x3f, 0x144a: 0x40, 0x144f: 0x41,
   506  	0x1459: 0x42, 0x145c: 0x43, 0x145e: 0x44, 0x145f: 0x45,
   507  	0x1468: 0x46, 0x1469: 0x47, 0x146c: 0x48, 0x146d: 0x49, 0x146e: 0x4a,
   508  	0x1472: 0x4b, 0x1473: 0x4c,
   509  	0x1479: 0x4d, 0x147b: 0x4e,
   510  	// Block 0x52, offset 0x1480
   511  	0x1480: 0x34,
   512  	0x1499: 0x11,
   513  	0x14b6: 0x2c,
   514  	// Block 0x53, offset 0x14c0
   515  	0x14e4: 0x0d,
   516  	// Block 0x54, offset 0x1500
   517  	0x1527: 0x08,
   518  	// Block 0x55, offset 0x1540
   519  	0x1555: 0x2b,
   520  	// Block 0x56, offset 0x1580
   521  	0x15b2: 0x35,
   522  	// Block 0x57, offset 0x15c0
   523  	0x15f2: 0x1c, 0x15f4: 0x29,
   524  	// Block 0x58, offset 0x1600
   525  	0x1600: 0x50, 0x1603: 0x51,
   526  	0x1608: 0x52, 0x160a: 0x53, 0x160d: 0x54, 0x160e: 0x55,
   527  }
   528  
   529  // lookup returns the trie value for the first UTF-8 encoding in s and
   530  // the width in bytes of this encoding. The size will be 0 if s does not
   531  // hold enough bytes to complete the encoding. len(s) must be greater than 0.
   532  func (t *multiTrie) lookup(s []byte) (v uint64, sz int) {
   533  	c0 := s[0]
   534  	switch {
   535  	case c0 < 0x80: // is ASCII
   536  		return t.ascii[c0], 1
   537  	case c0 < 0xC2:
   538  		return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
   539  	case c0 < 0xE0: // 2-byte UTF-8
   540  		if len(s) < 2 {
   541  			return 0, 0
   542  		}
   543  		i := t.utf8Start[c0]
   544  		c1 := s[1]
   545  		if c1 < 0x80 || 0xC0 <= c1 {
   546  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   547  		}
   548  		return t.lookupValue(uint32(i), c1), 2
   549  	case c0 < 0xF0: // 3-byte UTF-8
   550  		if len(s) < 3 {
   551  			return 0, 0
   552  		}
   553  		i := t.utf8Start[c0]
   554  		c1 := s[1]
   555  		if c1 < 0x80 || 0xC0 <= c1 {
   556  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   557  		}
   558  		o := uint32(i)<<6 + uint32(c1)
   559  		i = multiIndex[o]
   560  		c2 := s[2]
   561  		if c2 < 0x80 || 0xC0 <= c2 {
   562  			return 0, 2 // Illegal UTF-8: not a continuation byte.
   563  		}
   564  		return t.lookupValue(uint32(i), c2), 3
   565  	case c0 < 0xF8: // 4-byte UTF-8
   566  		if len(s) < 4 {
   567  			return 0, 0
   568  		}
   569  		i := t.utf8Start[c0]
   570  		c1 := s[1]
   571  		if c1 < 0x80 || 0xC0 <= c1 {
   572  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   573  		}
   574  		o := uint32(i)<<6 + uint32(c1)
   575  		i = multiIndex[o]
   576  		c2 := s[2]
   577  		if c2 < 0x80 || 0xC0 <= c2 {
   578  			return 0, 2 // Illegal UTF-8: not a continuation byte.
   579  		}
   580  		o = uint32(i)<<6 + uint32(c2)
   581  		i = multiIndex[o]
   582  		c3 := s[3]
   583  		if c3 < 0x80 || 0xC0 <= c3 {
   584  			return 0, 3 // Illegal UTF-8: not a continuation byte.
   585  		}
   586  		return t.lookupValue(uint32(i), c3), 4
   587  	}
   588  	// Illegal rune
   589  	return 0, 1
   590  }
   591  
   592  // lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
   593  // s must start with a full and valid UTF-8 encoded rune.
   594  func (t *multiTrie) lookupUnsafe(s []byte) uint64 {
   595  	c0 := s[0]
   596  	if c0 < 0x80 { // is ASCII
   597  		return t.ascii[c0]
   598  	}
   599  	i := t.utf8Start[c0]
   600  	if c0 < 0xE0 { // 2-byte UTF-8
   601  		return t.lookupValue(uint32(i), s[1])
   602  	}
   603  	i = multiIndex[uint32(i)<<6+uint32(s[1])]
   604  	if c0 < 0xF0 { // 3-byte UTF-8
   605  		return t.lookupValue(uint32(i), s[2])
   606  	}
   607  	i = multiIndex[uint32(i)<<6+uint32(s[2])]
   608  	if c0 < 0xF8 { // 4-byte UTF-8
   609  		return t.lookupValue(uint32(i), s[3])
   610  	}
   611  	return 0
   612  }
   613  
   614  // lookupString returns the trie value for the first UTF-8 encoding in s and
   615  // the width in bytes of this encoding. The size will be 0 if s does not
   616  // hold enough bytes to complete the encoding. len(s) must be greater than 0.
   617  func (t *multiTrie) lookupString(s string) (v uint64, sz int) {
   618  	c0 := s[0]
   619  	switch {
   620  	case c0 < 0x80: // is ASCII
   621  		return t.ascii[c0], 1
   622  	case c0 < 0xC2:
   623  		return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
   624  	case c0 < 0xE0: // 2-byte UTF-8
   625  		if len(s) < 2 {
   626  			return 0, 0
   627  		}
   628  		i := t.utf8Start[c0]
   629  		c1 := s[1]
   630  		if c1 < 0x80 || 0xC0 <= c1 {
   631  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   632  		}
   633  		return t.lookupValue(uint32(i), c1), 2
   634  	case c0 < 0xF0: // 3-byte UTF-8
   635  		if len(s) < 3 {
   636  			return 0, 0
   637  		}
   638  		i := t.utf8Start[c0]
   639  		c1 := s[1]
   640  		if c1 < 0x80 || 0xC0 <= c1 {
   641  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   642  		}
   643  		o := uint32(i)<<6 + uint32(c1)
   644  		i = multiIndex[o]
   645  		c2 := s[2]
   646  		if c2 < 0x80 || 0xC0 <= c2 {
   647  			return 0, 2 // Illegal UTF-8: not a continuation byte.
   648  		}
   649  		return t.lookupValue(uint32(i), c2), 3
   650  	case c0 < 0xF8: // 4-byte UTF-8
   651  		if len(s) < 4 {
   652  			return 0, 0
   653  		}
   654  		i := t.utf8Start[c0]
   655  		c1 := s[1]
   656  		if c1 < 0x80 || 0xC0 <= c1 {
   657  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   658  		}
   659  		o := uint32(i)<<6 + uint32(c1)
   660  		i = multiIndex[o]
   661  		c2 := s[2]
   662  		if c2 < 0x80 || 0xC0 <= c2 {
   663  			return 0, 2 // Illegal UTF-8: not a continuation byte.
   664  		}
   665  		o = uint32(i)<<6 + uint32(c2)
   666  		i = multiIndex[o]
   667  		c3 := s[3]
   668  		if c3 < 0x80 || 0xC0 <= c3 {
   669  			return 0, 3 // Illegal UTF-8: not a continuation byte.
   670  		}
   671  		return t.lookupValue(uint32(i), c3), 4
   672  	}
   673  	// Illegal rune
   674  	return 0, 1
   675  }
   676  
   677  // lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
   678  // s must start with a full and valid UTF-8 encoded rune.
   679  func (t *multiTrie) lookupStringUnsafe(s string) uint64 {
   680  	c0 := s[0]
   681  	if c0 < 0x80 { // is ASCII
   682  		return t.ascii[c0]
   683  	}
   684  	i := t.utf8Start[c0]
   685  	if c0 < 0xE0 { // 2-byte UTF-8
   686  		return t.lookupValue(uint32(i), s[1])
   687  	}
   688  	i = multiIndex[uint32(i)<<6+uint32(s[1])]
   689  	if c0 < 0xF0 { // 3-byte UTF-8
   690  		return t.lookupValue(uint32(i), s[2])
   691  	}
   692  	i = multiIndex[uint32(i)<<6+uint32(s[2])]
   693  	if c0 < 0xF8 { // 4-byte UTF-8
   694  		return t.lookupValue(uint32(i), s[3])
   695  	}
   696  	return 0
   697  }
   698  
   699  // multiTrie. Total size: 18250 bytes (17.82 KiB). Checksum: a69a609d8696aa5e.
   700  type multiTrie struct {
   701  	ascii     []uint64 // index for ASCII bytes
   702  	utf8Start []uint8  // index for UTF-8 bytes >= 0xC0
   703  }
   704  
   705  func newMultiTrie(i int) *multiTrie {
   706  	h := multiTrieHandles[i]
   707  	return &multiTrie{multiValues[uint32(h.ascii)<<6:], multiIndex[uint32(h.multi)<<6:]}
   708  }
   709  
   710  type multiTrieHandle struct {
   711  	ascii, multi uint8
   712  }
   713  
   714  // multiTrieHandles: 5 handles, 10 bytes
   715  var multiTrieHandles = [5]multiTrieHandle{
   716  	{0, 0},   // 8c1e77823143d35c: all
   717  	{0, 23},  // 8fb58ff8243b45b0: ASCII only
   718  	{0, 23},  // 8fb58ff8243b45b0: ASCII only 2
   719  	{0, 24},  // 2ccc43994f11046f: BMP only
   720  	{30, 25}, // ce448591bdcb4733: No BMP
   721  }
   722  
   723  // lookupValue determines the type of block n and looks up the value for b.
   724  func (t *multiTrie) lookupValue(n uint32, b byte) uint64 {
   725  	switch {
   726  	default:
   727  		return uint64(multiValues[n<<6+uint32(b)])
   728  	}
   729  }
   730  
   731  // multiValues: 32 blocks, 2048 entries, 16384 bytes
   732  // The third block is the zero block.
   733  var multiValues = [2048]uint64{
   734  	// Block 0x0, offset 0x0
   735  	0x03: 0x6e361699800b9fb8, 0x04: 0x52d3935a34f6f0b, 0x05: 0x2948319393e7ef10,
   736  	0x07: 0x20f03b006704f663, 0x08: 0x6c15c0732bb2495f, 0x09: 0xe54e2c59d953551,
   737  	0x0f: 0x33d8a825807d8037, 0x10: 0x6ecd93cb12168b92, 0x11: 0x6a81c9c0ce86e884,
   738  	0x1f: 0xa03e77aac8be79b, 0x20: 0x28591d0e7e486efa, 0x21: 0x716fa3bc398dec8,
   739  	0x3f: 0x4fd3bcfa72bce8b0,
   740  	// Block 0x1, offset 0x40
   741  	0x40: 0x3cbaef3db8ba5f12, 0x41: 0x2d262347c1f56357,
   742  	0x7f: 0x782caa2d25a418a9,
   743  	// Block 0x2, offset 0x80
   744  	// Block 0x3, offset 0xc0
   745  	0xc0: 0x6bbd1f937b1ff5d2, 0xc1: 0x732e23088d2eb8a4,
   746  	// Block 0x4, offset 0x100
   747  	0x13f: 0x56f8c4c82f5962dc,
   748  	// Block 0x5, offset 0x140
   749  	0x140: 0x57dc4544729a5da2, 0x141: 0x2f62f9cd307ffa0d,
   750  	// Block 0x6, offset 0x180
   751  	0x1bf: 0x7bf4d0ebf302a088,
   752  	// Block 0x7, offset 0x1c0
   753  	0x1c0: 0x1f0d67f249e59931, 0x1c1: 0x3011def73aa550c7,
   754  	// Block 0x8, offset 0x200
   755  	0x23f: 0x5de81c1dff6bf29d,
   756  	// Block 0x9, offset 0x240
   757  	0x240: 0x752c035737b825e8, 0x241: 0x1e793399081e3bb3,
   758  	// Block 0xa, offset 0x280
   759  	0x2bf: 0x6a28f01979cbf059,
   760  	// Block 0xb, offset 0x2c0
   761  	0x2c0: 0x373a4b0f2cbd4c74, 0x2c1: 0x4fd2c288683b767c,
   762  	// Block 0xc, offset 0x300
   763  	0x33f: 0x5a10ffa9e29184fb,
   764  	// Block 0xd, offset 0x340
   765  	0x340: 0x700f9bdb53fff6a5, 0x341: 0xcde93df0427eb79,
   766  	// Block 0xe, offset 0x380
   767  	0x3bf: 0x74071288fff39c76,
   768  	// Block 0xf, offset 0x3c0
   769  	0x3c0: 0x481fc2f510e5268a, 0x3c1: 0x7565c28164204849,
   770  	// Block 0x10, offset 0x400
   771  	0x43f: 0x5676a62fd49c6bec,
   772  	// Block 0x11, offset 0x440
   773  	0x440: 0x2f2d15776cbafc6b, 0x441: 0x4c55e8dc0ff11a3f,
   774  	// Block 0x12, offset 0x480
   775  	0x4bf: 0x69d6f0fe711fafc9,
   776  	// Block 0x13, offset 0x4c0
   777  	0x4c0: 0x33181de28cfb062d, 0x4c1: 0x2ef3adc6bb2f2d02,
   778  	// Block 0x14, offset 0x500
   779  	0x53f: 0xe03b31814c95f8b,
   780  	// Block 0x15, offset 0x540
   781  	0x540: 0x3bf6dc9a1c115603, 0x541: 0x6984ec9b7f51f7fc,
   782  	// Block 0x16, offset 0x580
   783  	0x5bf: 0x3c02ea92fb168559,
   784  	// Block 0x17, offset 0x5c0
   785  	0x5c0: 0x1badfe42e7629494, 0x5c1: 0x6dc4a554005f7645,
   786  	// Block 0x18, offset 0x600
   787  	0x63f: 0x3bb2ed2a72748f4b,
   788  	// Block 0x19, offset 0x640
   789  	0x640: 0x291354cd6767ec10, 0x641: 0x2c3a4715e3c070d6,
   790  	// Block 0x1a, offset 0x680
   791  	0x6bf: 0x352711cfb7236418,
   792  	// Block 0x1b, offset 0x6c0
   793  	0x6c0: 0x3a59d34fb8bceda, 0x6c1: 0x5e90d8ebedd64fa1,
   794  	// Block 0x1c, offset 0x700
   795  	0x73f: 0x7191a77b28d23110,
   796  	// Block 0x1d, offset 0x740
   797  	0x740: 0x4ca7f0c1623423d8, 0x741: 0x4f7156d996e2d0de,
   798  	// Block 0x1e, offset 0x780
   799  	// Block 0x1f, offset 0x7c0
   800  }
   801  
   802  // multiIndex: 29 blocks, 1856 entries, 1856 bytes
   803  // Block 0 is the zero block.
   804  var multiIndex = [1856]uint8{
   805  	// Block 0x0, offset 0x0
   806  	// Block 0x1, offset 0x40
   807  	// Block 0x2, offset 0x80
   808  	// Block 0x3, offset 0xc0
   809  	0xc2: 0x01, 0xc3: 0x02, 0xc4: 0x03, 0xc7: 0x04,
   810  	0xc8: 0x05, 0xcf: 0x06,
   811  	0xd0: 0x07,
   812  	0xdf: 0x08,
   813  	0xe0: 0x02, 0xe1: 0x03, 0xe2: 0x04, 0xe3: 0x05, 0xe4: 0x06, 0xe7: 0x07,
   814  	0xe8: 0x08, 0xef: 0x09,
   815  	0xf0: 0x0e, 0xf1: 0x11, 0xf2: 0x13, 0xf3: 0x15, 0xf4: 0x17,
   816  	// Block 0x4, offset 0x100
   817  	0x120: 0x09,
   818  	0x13f: 0x0a,
   819  	// Block 0x5, offset 0x140
   820  	0x140: 0x0b,
   821  	0x17f: 0x0c,
   822  	// Block 0x6, offset 0x180
   823  	0x180: 0x0d,
   824  	// Block 0x7, offset 0x1c0
   825  	0x1ff: 0x0e,
   826  	// Block 0x8, offset 0x200
   827  	0x200: 0x0f,
   828  	// Block 0x9, offset 0x240
   829  	0x27f: 0x10,
   830  	// Block 0xa, offset 0x280
   831  	0x280: 0x11,
   832  	// Block 0xb, offset 0x2c0
   833  	0x2ff: 0x12,
   834  	// Block 0xc, offset 0x300
   835  	0x300: 0x13,
   836  	// Block 0xd, offset 0x340
   837  	0x37f: 0x14,
   838  	// Block 0xe, offset 0x380
   839  	0x380: 0x15,
   840  	// Block 0xf, offset 0x3c0
   841  	0x3ff: 0x16,
   842  	// Block 0x10, offset 0x400
   843  	0x410: 0x0a,
   844  	0x41f: 0x0b,
   845  	0x420: 0x0c,
   846  	0x43f: 0x0d,
   847  	// Block 0x11, offset 0x440
   848  	0x440: 0x17,
   849  	// Block 0x12, offset 0x480
   850  	0x4bf: 0x18,
   851  	// Block 0x13, offset 0x4c0
   852  	0x4c0: 0x0f,
   853  	0x4ff: 0x10,
   854  	// Block 0x14, offset 0x500
   855  	0x500: 0x19,
   856  	// Block 0x15, offset 0x540
   857  	0x540: 0x12,
   858  	// Block 0x16, offset 0x580
   859  	0x5bf: 0x1a,
   860  	// Block 0x17, offset 0x5c0
   861  	0x5ff: 0x14,
   862  	// Block 0x18, offset 0x600
   863  	0x600: 0x1b,
   864  	// Block 0x19, offset 0x640
   865  	0x640: 0x16,
   866  	// Block 0x1a, offset 0x680
   867  	// Block 0x1b, offset 0x6c0
   868  	0x6c2: 0x01, 0x6c3: 0x02, 0x6c4: 0x03, 0x6c7: 0x04,
   869  	0x6c8: 0x05, 0x6cf: 0x06,
   870  	0x6d0: 0x07,
   871  	0x6df: 0x08,
   872  	0x6e0: 0x02, 0x6e1: 0x03, 0x6e2: 0x04, 0x6e3: 0x05, 0x6e4: 0x06, 0x6e7: 0x07,
   873  	0x6e8: 0x08, 0x6ef: 0x09,
   874  	// Block 0x1c, offset 0x700
   875  	0x730: 0x0e, 0x731: 0x11, 0x732: 0x13, 0x733: 0x15, 0x734: 0x17,
   876  }