github.com/go-xe2/third@v1.0.3/golang.org/x/text/internal/triegen/data_test.go (about) 1 // This file is generated with "go test -tags generate". DO NOT EDIT! 2 // +build !generate 3 4 package triegen_test 5 6 // lookup returns the trie value for the first UTF-8 encoding in s and 7 // the width in bytes of this encoding. The size will be 0 if s does not 8 // hold enough bytes to complete the encoding. len(s) must be greater than 0. 9 func (t *randTrie) lookup(s []byte) (v uint8, sz int) { 10 c0 := s[0] 11 switch { 12 case c0 < 0x80: // is ASCII 13 return randValues[c0], 1 14 case c0 < 0xC2: 15 return 0, 1 // Illegal UTF-8: not a starter, not ASCII. 16 case c0 < 0xE0: // 2-byte UTF-8 17 if len(s) < 2 { 18 return 0, 0 19 } 20 i := randIndex[c0] 21 c1 := s[1] 22 if c1 < 0x80 || 0xC0 <= c1 { 23 return 0, 1 // Illegal UTF-8: not a continuation byte. 24 } 25 return t.lookupValue(uint32(i), c1), 2 26 case c0 < 0xF0: // 3-byte UTF-8 27 if len(s) < 3 { 28 return 0, 0 29 } 30 i := randIndex[c0] 31 c1 := s[1] 32 if c1 < 0x80 || 0xC0 <= c1 { 33 return 0, 1 // Illegal UTF-8: not a continuation byte. 34 } 35 o := uint32(i)<<6 + uint32(c1) 36 i = randIndex[o] 37 c2 := s[2] 38 if c2 < 0x80 || 0xC0 <= c2 { 39 return 0, 2 // Illegal UTF-8: not a continuation byte. 40 } 41 return t.lookupValue(uint32(i), c2), 3 42 case c0 < 0xF8: // 4-byte UTF-8 43 if len(s) < 4 { 44 return 0, 0 45 } 46 i := randIndex[c0] 47 c1 := s[1] 48 if c1 < 0x80 || 0xC0 <= c1 { 49 return 0, 1 // Illegal UTF-8: not a continuation byte. 50 } 51 o := uint32(i)<<6 + uint32(c1) 52 i = randIndex[o] 53 c2 := s[2] 54 if c2 < 0x80 || 0xC0 <= c2 { 55 return 0, 2 // Illegal UTF-8: not a continuation byte. 56 } 57 o = uint32(i)<<6 + uint32(c2) 58 i = randIndex[o] 59 c3 := s[3] 60 if c3 < 0x80 || 0xC0 <= c3 { 61 return 0, 3 // Illegal UTF-8: not a continuation byte. 62 } 63 return t.lookupValue(uint32(i), c3), 4 64 } 65 // Illegal rune 66 return 0, 1 67 } 68 69 // lookupUnsafe returns the trie value for the first UTF-8 encoding in s. 70 // s must start with a full and valid UTF-8 encoded rune. 71 func (t *randTrie) lookupUnsafe(s []byte) uint8 { 72 c0 := s[0] 73 if c0 < 0x80 { // is ASCII 74 return randValues[c0] 75 } 76 i := randIndex[c0] 77 if c0 < 0xE0 { // 2-byte UTF-8 78 return t.lookupValue(uint32(i), s[1]) 79 } 80 i = randIndex[uint32(i)<<6+uint32(s[1])] 81 if c0 < 0xF0 { // 3-byte UTF-8 82 return t.lookupValue(uint32(i), s[2]) 83 } 84 i = randIndex[uint32(i)<<6+uint32(s[2])] 85 if c0 < 0xF8 { // 4-byte UTF-8 86 return t.lookupValue(uint32(i), s[3]) 87 } 88 return 0 89 } 90 91 // lookupString returns the trie value for the first UTF-8 encoding in s and 92 // the width in bytes of this encoding. The size will be 0 if s does not 93 // hold enough bytes to complete the encoding. len(s) must be greater than 0. 94 func (t *randTrie) lookupString(s string) (v uint8, sz int) { 95 c0 := s[0] 96 switch { 97 case c0 < 0x80: // is ASCII 98 return randValues[c0], 1 99 case c0 < 0xC2: 100 return 0, 1 // Illegal UTF-8: not a starter, not ASCII. 101 case c0 < 0xE0: // 2-byte UTF-8 102 if len(s) < 2 { 103 return 0, 0 104 } 105 i := randIndex[c0] 106 c1 := s[1] 107 if c1 < 0x80 || 0xC0 <= c1 { 108 return 0, 1 // Illegal UTF-8: not a continuation byte. 109 } 110 return t.lookupValue(uint32(i), c1), 2 111 case c0 < 0xF0: // 3-byte UTF-8 112 if len(s) < 3 { 113 return 0, 0 114 } 115 i := randIndex[c0] 116 c1 := s[1] 117 if c1 < 0x80 || 0xC0 <= c1 { 118 return 0, 1 // Illegal UTF-8: not a continuation byte. 119 } 120 o := uint32(i)<<6 + uint32(c1) 121 i = randIndex[o] 122 c2 := s[2] 123 if c2 < 0x80 || 0xC0 <= c2 { 124 return 0, 2 // Illegal UTF-8: not a continuation byte. 125 } 126 return t.lookupValue(uint32(i), c2), 3 127 case c0 < 0xF8: // 4-byte UTF-8 128 if len(s) < 4 { 129 return 0, 0 130 } 131 i := randIndex[c0] 132 c1 := s[1] 133 if c1 < 0x80 || 0xC0 <= c1 { 134 return 0, 1 // Illegal UTF-8: not a continuation byte. 135 } 136 o := uint32(i)<<6 + uint32(c1) 137 i = randIndex[o] 138 c2 := s[2] 139 if c2 < 0x80 || 0xC0 <= c2 { 140 return 0, 2 // Illegal UTF-8: not a continuation byte. 141 } 142 o = uint32(i)<<6 + uint32(c2) 143 i = randIndex[o] 144 c3 := s[3] 145 if c3 < 0x80 || 0xC0 <= c3 { 146 return 0, 3 // Illegal UTF-8: not a continuation byte. 147 } 148 return t.lookupValue(uint32(i), c3), 4 149 } 150 // Illegal rune 151 return 0, 1 152 } 153 154 // lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. 155 // s must start with a full and valid UTF-8 encoded rune. 156 func (t *randTrie) lookupStringUnsafe(s string) uint8 { 157 c0 := s[0] 158 if c0 < 0x80 { // is ASCII 159 return randValues[c0] 160 } 161 i := randIndex[c0] 162 if c0 < 0xE0 { // 2-byte UTF-8 163 return t.lookupValue(uint32(i), s[1]) 164 } 165 i = randIndex[uint32(i)<<6+uint32(s[1])] 166 if c0 < 0xF0 { // 3-byte UTF-8 167 return t.lookupValue(uint32(i), s[2]) 168 } 169 i = randIndex[uint32(i)<<6+uint32(s[2])] 170 if c0 < 0xF8 { // 4-byte UTF-8 171 return t.lookupValue(uint32(i), s[3]) 172 } 173 return 0 174 } 175 176 // randTrie. Total size: 9280 bytes (9.06 KiB). Checksum: 6debd324a8debb8f. 177 type randTrie struct{} 178 179 func newRandTrie(i int) *randTrie { 180 return &randTrie{} 181 } 182 183 // lookupValue determines the type of block n and looks up the value for b. 184 func (t *randTrie) lookupValue(n uint32, b byte) uint8 { 185 switch { 186 default: 187 return uint8(randValues[n<<6+uint32(b)]) 188 } 189 } 190 191 // randValues: 56 blocks, 3584 entries, 3584 bytes 192 // The third block is the zero block. 193 var randValues = [3584]uint8{ 194 // Block 0x0, offset 0x0 195 // Block 0x1, offset 0x40 196 // Block 0x2, offset 0x80 197 // Block 0x3, offset 0xc0 198 0xc9: 0x0001, 199 // Block 0x4, offset 0x100 200 0x100: 0x0001, 201 // Block 0x5, offset 0x140 202 0x155: 0x0001, 203 // Block 0x6, offset 0x180 204 0x196: 0x0001, 205 // Block 0x7, offset 0x1c0 206 0x1ef: 0x0001, 207 // Block 0x8, offset 0x200 208 0x206: 0x0001, 209 // Block 0x9, offset 0x240 210 0x258: 0x0001, 211 // Block 0xa, offset 0x280 212 0x288: 0x0001, 213 // Block 0xb, offset 0x2c0 214 0x2f2: 0x0001, 215 // Block 0xc, offset 0x300 216 0x304: 0x0001, 217 // Block 0xd, offset 0x340 218 0x34b: 0x0001, 219 // Block 0xe, offset 0x380 220 0x3ba: 0x0001, 221 // Block 0xf, offset 0x3c0 222 0x3f5: 0x0001, 223 // Block 0x10, offset 0x400 224 0x41d: 0x0001, 225 // Block 0x11, offset 0x440 226 0x442: 0x0001, 227 // Block 0x12, offset 0x480 228 0x4bb: 0x0001, 229 // Block 0x13, offset 0x4c0 230 0x4e9: 0x0001, 231 // Block 0x14, offset 0x500 232 0x53e: 0x0001, 233 // Block 0x15, offset 0x540 234 0x55f: 0x0001, 235 // Block 0x16, offset 0x580 236 0x5b7: 0x0001, 237 // Block 0x17, offset 0x5c0 238 0x5d9: 0x0001, 239 // Block 0x18, offset 0x600 240 0x60e: 0x0001, 241 // Block 0x19, offset 0x640 242 0x652: 0x0001, 243 // Block 0x1a, offset 0x680 244 0x68f: 0x0001, 245 // Block 0x1b, offset 0x6c0 246 0x6dc: 0x0001, 247 // Block 0x1c, offset 0x700 248 0x703: 0x0001, 249 // Block 0x1d, offset 0x740 250 0x741: 0x0001, 251 // Block 0x1e, offset 0x780 252 0x79b: 0x0001, 253 // Block 0x1f, offset 0x7c0 254 0x7f1: 0x0001, 255 // Block 0x20, offset 0x800 256 0x833: 0x0001, 257 // Block 0x21, offset 0x840 258 0x853: 0x0001, 259 // Block 0x22, offset 0x880 260 0x8a2: 0x0001, 261 // Block 0x23, offset 0x8c0 262 0x8f8: 0x0001, 263 // Block 0x24, offset 0x900 264 0x917: 0x0001, 265 // Block 0x25, offset 0x940 266 0x945: 0x0001, 267 // Block 0x26, offset 0x980 268 0x99e: 0x0001, 269 // Block 0x27, offset 0x9c0 270 0x9fd: 0x0001, 271 // Block 0x28, offset 0xa00 272 0xa0d: 0x0001, 273 // Block 0x29, offset 0xa40 274 0xa66: 0x0001, 275 // Block 0x2a, offset 0xa80 276 0xaab: 0x0001, 277 // Block 0x2b, offset 0xac0 278 0xaea: 0x0001, 279 // Block 0x2c, offset 0xb00 280 0xb2d: 0x0001, 281 // Block 0x2d, offset 0xb40 282 0xb54: 0x0001, 283 // Block 0x2e, offset 0xb80 284 0xb90: 0x0001, 285 // Block 0x2f, offset 0xbc0 286 0xbe5: 0x0001, 287 // Block 0x30, offset 0xc00 288 0xc28: 0x0001, 289 // Block 0x31, offset 0xc40 290 0xc7c: 0x0001, 291 // Block 0x32, offset 0xc80 292 0xcbf: 0x0001, 293 // Block 0x33, offset 0xcc0 294 0xcc7: 0x0001, 295 // Block 0x34, offset 0xd00 296 0xd34: 0x0001, 297 // Block 0x35, offset 0xd40 298 0xd61: 0x0001, 299 // Block 0x36, offset 0xd80 300 0xdb9: 0x0001, 301 // Block 0x37, offset 0xdc0 302 0xdda: 0x0001, 303 } 304 305 // randIndex: 89 blocks, 5696 entries, 5696 bytes 306 // Block 0 is the zero block. 307 var randIndex = [5696]uint8{ 308 // Block 0x0, offset 0x0 309 // Block 0x1, offset 0x40 310 // Block 0x2, offset 0x80 311 // Block 0x3, offset 0xc0 312 0xe1: 0x02, 0xe3: 0x03, 0xe4: 0x04, 313 0xea: 0x05, 0xeb: 0x06, 0xec: 0x07, 314 0xf0: 0x10, 0xf1: 0x24, 0xf2: 0x3d, 0xf3: 0x4f, 0xf4: 0x56, 315 // Block 0x4, offset 0x100 316 0x107: 0x01, 317 // Block 0x5, offset 0x140 318 0x16c: 0x02, 319 // Block 0x6, offset 0x180 320 0x19c: 0x03, 321 0x1ae: 0x04, 322 // Block 0x7, offset 0x1c0 323 0x1d8: 0x05, 324 0x1f7: 0x06, 325 // Block 0x8, offset 0x200 326 0x20c: 0x07, 327 // Block 0x9, offset 0x240 328 0x24a: 0x08, 329 // Block 0xa, offset 0x280 330 0x2b6: 0x09, 331 // Block 0xb, offset 0x2c0 332 0x2d5: 0x0a, 333 // Block 0xc, offset 0x300 334 0x31a: 0x0b, 335 // Block 0xd, offset 0x340 336 0x373: 0x0c, 337 // Block 0xe, offset 0x380 338 0x38b: 0x0d, 339 // Block 0xf, offset 0x3c0 340 0x3f0: 0x0e, 341 // Block 0x10, offset 0x400 342 0x433: 0x0f, 343 // Block 0x11, offset 0x440 344 0x45d: 0x10, 345 // Block 0x12, offset 0x480 346 0x491: 0x08, 0x494: 0x09, 0x497: 0x0a, 347 0x49b: 0x0b, 0x49c: 0x0c, 348 0x4a1: 0x0d, 349 0x4ad: 0x0e, 350 0x4ba: 0x0f, 351 // Block 0x13, offset 0x4c0 352 0x4c1: 0x11, 353 // Block 0x14, offset 0x500 354 0x531: 0x12, 355 // Block 0x15, offset 0x540 356 0x546: 0x13, 357 // Block 0x16, offset 0x580 358 0x5ab: 0x14, 359 // Block 0x17, offset 0x5c0 360 0x5d4: 0x11, 361 0x5fe: 0x11, 362 // Block 0x18, offset 0x600 363 0x618: 0x0a, 364 // Block 0x19, offset 0x640 365 0x65b: 0x15, 366 // Block 0x1a, offset 0x680 367 0x6a0: 0x16, 368 // Block 0x1b, offset 0x6c0 369 0x6d2: 0x17, 370 0x6f6: 0x18, 371 // Block 0x1c, offset 0x700 372 0x711: 0x19, 373 // Block 0x1d, offset 0x740 374 0x768: 0x1a, 375 // Block 0x1e, offset 0x780 376 0x783: 0x1b, 377 // Block 0x1f, offset 0x7c0 378 0x7f9: 0x1c, 379 // Block 0x20, offset 0x800 380 0x831: 0x1d, 381 // Block 0x21, offset 0x840 382 0x85e: 0x1e, 383 // Block 0x22, offset 0x880 384 0x898: 0x1f, 385 // Block 0x23, offset 0x8c0 386 0x8c7: 0x18, 387 0x8d5: 0x14, 388 0x8f7: 0x20, 389 0x8fe: 0x1f, 390 // Block 0x24, offset 0x900 391 0x905: 0x21, 392 // Block 0x25, offset 0x940 393 0x966: 0x03, 394 // Block 0x26, offset 0x980 395 0x981: 0x07, 0x983: 0x11, 396 0x989: 0x12, 0x98a: 0x13, 0x98e: 0x14, 0x98f: 0x15, 397 0x992: 0x16, 0x995: 0x17, 0x996: 0x18, 398 0x998: 0x19, 0x999: 0x1a, 0x99b: 0x1b, 0x99f: 0x1c, 399 0x9a3: 0x1d, 400 0x9ad: 0x1e, 0x9af: 0x1f, 401 0x9b0: 0x20, 0x9b1: 0x21, 402 0x9b8: 0x22, 0x9bd: 0x23, 403 // Block 0x27, offset 0x9c0 404 0x9cd: 0x22, 405 // Block 0x28, offset 0xa00 406 0xa0c: 0x08, 407 // Block 0x29, offset 0xa40 408 0xa6f: 0x1c, 409 // Block 0x2a, offset 0xa80 410 0xa90: 0x1a, 411 0xaaf: 0x23, 412 // Block 0x2b, offset 0xac0 413 0xae3: 0x19, 414 0xae8: 0x24, 415 0xafc: 0x25, 416 // Block 0x2c, offset 0xb00 417 0xb13: 0x26, 418 // Block 0x2d, offset 0xb40 419 0xb67: 0x1c, 420 // Block 0x2e, offset 0xb80 421 0xb8f: 0x0b, 422 // Block 0x2f, offset 0xbc0 423 0xbcb: 0x27, 424 0xbe7: 0x26, 425 // Block 0x30, offset 0xc00 426 0xc34: 0x16, 427 // Block 0x31, offset 0xc40 428 0xc62: 0x03, 429 // Block 0x32, offset 0xc80 430 0xcbb: 0x12, 431 // Block 0x33, offset 0xcc0 432 0xcdf: 0x09, 433 // Block 0x34, offset 0xd00 434 0xd34: 0x0a, 435 // Block 0x35, offset 0xd40 436 0xd41: 0x1e, 437 // Block 0x36, offset 0xd80 438 0xd83: 0x28, 439 // Block 0x37, offset 0xdc0 440 0xdc0: 0x15, 441 // Block 0x38, offset 0xe00 442 0xe1a: 0x15, 443 // Block 0x39, offset 0xe40 444 0xe65: 0x29, 445 // Block 0x3a, offset 0xe80 446 0xe86: 0x1f, 447 // Block 0x3b, offset 0xec0 448 0xeec: 0x18, 449 // Block 0x3c, offset 0xf00 450 0xf28: 0x2a, 451 // Block 0x3d, offset 0xf40 452 0xf53: 0x08, 453 // Block 0x3e, offset 0xf80 454 0xfa2: 0x2b, 455 0xfaa: 0x17, 456 // Block 0x3f, offset 0xfc0 457 0xfc0: 0x25, 0xfc2: 0x26, 458 0xfc9: 0x27, 0xfcd: 0x28, 0xfce: 0x29, 459 0xfd5: 0x2a, 460 0xfd8: 0x2b, 0xfd9: 0x2c, 0xfdf: 0x2d, 461 0xfe1: 0x2e, 0xfe2: 0x2f, 0xfe3: 0x30, 0xfe6: 0x31, 462 0xfe9: 0x32, 0xfec: 0x33, 0xfed: 0x34, 0xfef: 0x35, 463 0xff1: 0x36, 0xff2: 0x37, 0xff3: 0x38, 0xff4: 0x39, 464 0xffa: 0x3a, 0xffc: 0x3b, 0xffe: 0x3c, 465 // Block 0x40, offset 0x1000 466 0x102c: 0x2c, 467 // Block 0x41, offset 0x1040 468 0x1074: 0x2c, 469 // Block 0x42, offset 0x1080 470 0x108c: 0x08, 471 0x10a0: 0x2d, 472 // Block 0x43, offset 0x10c0 473 0x10e8: 0x10, 474 // Block 0x44, offset 0x1100 475 0x110f: 0x13, 476 // Block 0x45, offset 0x1140 477 0x114b: 0x2e, 478 // Block 0x46, offset 0x1180 479 0x118b: 0x23, 480 0x119d: 0x0c, 481 // Block 0x47, offset 0x11c0 482 0x11c3: 0x12, 483 0x11f9: 0x0f, 484 // Block 0x48, offset 0x1200 485 0x121e: 0x1b, 486 // Block 0x49, offset 0x1240 487 0x1270: 0x2f, 488 // Block 0x4a, offset 0x1280 489 0x128a: 0x1b, 490 0x12a7: 0x02, 491 // Block 0x4b, offset 0x12c0 492 0x12fb: 0x14, 493 // Block 0x4c, offset 0x1300 494 0x1333: 0x30, 495 // Block 0x4d, offset 0x1340 496 0x134d: 0x31, 497 // Block 0x4e, offset 0x1380 498 0x138e: 0x15, 499 // Block 0x4f, offset 0x13c0 500 0x13f4: 0x32, 501 // Block 0x50, offset 0x1400 502 0x141b: 0x33, 503 // Block 0x51, offset 0x1440 504 0x1448: 0x3e, 0x1449: 0x3f, 0x144a: 0x40, 0x144f: 0x41, 505 0x1459: 0x42, 0x145c: 0x43, 0x145e: 0x44, 0x145f: 0x45, 506 0x1468: 0x46, 0x1469: 0x47, 0x146c: 0x48, 0x146d: 0x49, 0x146e: 0x4a, 507 0x1472: 0x4b, 0x1473: 0x4c, 508 0x1479: 0x4d, 0x147b: 0x4e, 509 // Block 0x52, offset 0x1480 510 0x1480: 0x34, 511 0x1499: 0x11, 512 0x14b6: 0x2c, 513 // Block 0x53, offset 0x14c0 514 0x14e4: 0x0d, 515 // Block 0x54, offset 0x1500 516 0x1527: 0x08, 517 // Block 0x55, offset 0x1540 518 0x1555: 0x2b, 519 // Block 0x56, offset 0x1580 520 0x15b2: 0x35, 521 // Block 0x57, offset 0x15c0 522 0x15f2: 0x1c, 0x15f4: 0x29, 523 // Block 0x58, offset 0x1600 524 0x1600: 0x50, 0x1603: 0x51, 525 0x1608: 0x52, 0x160a: 0x53, 0x160d: 0x54, 0x160e: 0x55, 526 } 527 528 // lookup returns the trie value for the first UTF-8 encoding in s and 529 // the width in bytes of this encoding. The size will be 0 if s does not 530 // hold enough bytes to complete the encoding. len(s) must be greater than 0. 531 func (t *multiTrie) lookup(s []byte) (v uint64, sz int) { 532 c0 := s[0] 533 switch { 534 case c0 < 0x80: // is ASCII 535 return t.ascii[c0], 1 536 case c0 < 0xC2: 537 return 0, 1 // Illegal UTF-8: not a starter, not ASCII. 538 case c0 < 0xE0: // 2-byte UTF-8 539 if len(s) < 2 { 540 return 0, 0 541 } 542 i := t.utf8Start[c0] 543 c1 := s[1] 544 if c1 < 0x80 || 0xC0 <= c1 { 545 return 0, 1 // Illegal UTF-8: not a continuation byte. 546 } 547 return t.lookupValue(uint32(i), c1), 2 548 case c0 < 0xF0: // 3-byte UTF-8 549 if len(s) < 3 { 550 return 0, 0 551 } 552 i := t.utf8Start[c0] 553 c1 := s[1] 554 if c1 < 0x80 || 0xC0 <= c1 { 555 return 0, 1 // Illegal UTF-8: not a continuation byte. 556 } 557 o := uint32(i)<<6 + uint32(c1) 558 i = multiIndex[o] 559 c2 := s[2] 560 if c2 < 0x80 || 0xC0 <= c2 { 561 return 0, 2 // Illegal UTF-8: not a continuation byte. 562 } 563 return t.lookupValue(uint32(i), c2), 3 564 case c0 < 0xF8: // 4-byte UTF-8 565 if len(s) < 4 { 566 return 0, 0 567 } 568 i := t.utf8Start[c0] 569 c1 := s[1] 570 if c1 < 0x80 || 0xC0 <= c1 { 571 return 0, 1 // Illegal UTF-8: not a continuation byte. 572 } 573 o := uint32(i)<<6 + uint32(c1) 574 i = multiIndex[o] 575 c2 := s[2] 576 if c2 < 0x80 || 0xC0 <= c2 { 577 return 0, 2 // Illegal UTF-8: not a continuation byte. 578 } 579 o = uint32(i)<<6 + uint32(c2) 580 i = multiIndex[o] 581 c3 := s[3] 582 if c3 < 0x80 || 0xC0 <= c3 { 583 return 0, 3 // Illegal UTF-8: not a continuation byte. 584 } 585 return t.lookupValue(uint32(i), c3), 4 586 } 587 // Illegal rune 588 return 0, 1 589 } 590 591 // lookupUnsafe returns the trie value for the first UTF-8 encoding in s. 592 // s must start with a full and valid UTF-8 encoded rune. 593 func (t *multiTrie) lookupUnsafe(s []byte) uint64 { 594 c0 := s[0] 595 if c0 < 0x80 { // is ASCII 596 return t.ascii[c0] 597 } 598 i := t.utf8Start[c0] 599 if c0 < 0xE0 { // 2-byte UTF-8 600 return t.lookupValue(uint32(i), s[1]) 601 } 602 i = multiIndex[uint32(i)<<6+uint32(s[1])] 603 if c0 < 0xF0 { // 3-byte UTF-8 604 return t.lookupValue(uint32(i), s[2]) 605 } 606 i = multiIndex[uint32(i)<<6+uint32(s[2])] 607 if c0 < 0xF8 { // 4-byte UTF-8 608 return t.lookupValue(uint32(i), s[3]) 609 } 610 return 0 611 } 612 613 // lookupString returns the trie value for the first UTF-8 encoding in s and 614 // the width in bytes of this encoding. The size will be 0 if s does not 615 // hold enough bytes to complete the encoding. len(s) must be greater than 0. 616 func (t *multiTrie) lookupString(s string) (v uint64, sz int) { 617 c0 := s[0] 618 switch { 619 case c0 < 0x80: // is ASCII 620 return t.ascii[c0], 1 621 case c0 < 0xC2: 622 return 0, 1 // Illegal UTF-8: not a starter, not ASCII. 623 case c0 < 0xE0: // 2-byte UTF-8 624 if len(s) < 2 { 625 return 0, 0 626 } 627 i := t.utf8Start[c0] 628 c1 := s[1] 629 if c1 < 0x80 || 0xC0 <= c1 { 630 return 0, 1 // Illegal UTF-8: not a continuation byte. 631 } 632 return t.lookupValue(uint32(i), c1), 2 633 case c0 < 0xF0: // 3-byte UTF-8 634 if len(s) < 3 { 635 return 0, 0 636 } 637 i := t.utf8Start[c0] 638 c1 := s[1] 639 if c1 < 0x80 || 0xC0 <= c1 { 640 return 0, 1 // Illegal UTF-8: not a continuation byte. 641 } 642 o := uint32(i)<<6 + uint32(c1) 643 i = multiIndex[o] 644 c2 := s[2] 645 if c2 < 0x80 || 0xC0 <= c2 { 646 return 0, 2 // Illegal UTF-8: not a continuation byte. 647 } 648 return t.lookupValue(uint32(i), c2), 3 649 case c0 < 0xF8: // 4-byte UTF-8 650 if len(s) < 4 { 651 return 0, 0 652 } 653 i := t.utf8Start[c0] 654 c1 := s[1] 655 if c1 < 0x80 || 0xC0 <= c1 { 656 return 0, 1 // Illegal UTF-8: not a continuation byte. 657 } 658 o := uint32(i)<<6 + uint32(c1) 659 i = multiIndex[o] 660 c2 := s[2] 661 if c2 < 0x80 || 0xC0 <= c2 { 662 return 0, 2 // Illegal UTF-8: not a continuation byte. 663 } 664 o = uint32(i)<<6 + uint32(c2) 665 i = multiIndex[o] 666 c3 := s[3] 667 if c3 < 0x80 || 0xC0 <= c3 { 668 return 0, 3 // Illegal UTF-8: not a continuation byte. 669 } 670 return t.lookupValue(uint32(i), c3), 4 671 } 672 // Illegal rune 673 return 0, 1 674 } 675 676 // lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. 677 // s must start with a full and valid UTF-8 encoded rune. 678 func (t *multiTrie) lookupStringUnsafe(s string) uint64 { 679 c0 := s[0] 680 if c0 < 0x80 { // is ASCII 681 return t.ascii[c0] 682 } 683 i := t.utf8Start[c0] 684 if c0 < 0xE0 { // 2-byte UTF-8 685 return t.lookupValue(uint32(i), s[1]) 686 } 687 i = multiIndex[uint32(i)<<6+uint32(s[1])] 688 if c0 < 0xF0 { // 3-byte UTF-8 689 return t.lookupValue(uint32(i), s[2]) 690 } 691 i = multiIndex[uint32(i)<<6+uint32(s[2])] 692 if c0 < 0xF8 { // 4-byte UTF-8 693 return t.lookupValue(uint32(i), s[3]) 694 } 695 return 0 696 } 697 698 // multiTrie. Total size: 18250 bytes (17.82 KiB). Checksum: a69a609d8696aa5e. 699 type multiTrie struct { 700 ascii []uint64 // index for ASCII bytes 701 utf8Start []uint8 // index for UTF-8 bytes >= 0xC0 702 } 703 704 func newMultiTrie(i int) *multiTrie { 705 h := multiTrieHandles[i] 706 return &multiTrie{multiValues[uint32(h.ascii)<<6:], multiIndex[uint32(h.multi)<<6:]} 707 } 708 709 type multiTrieHandle struct { 710 ascii, multi uint8 711 } 712 713 // multiTrieHandles: 5 handles, 10 bytes 714 var multiTrieHandles = [5]multiTrieHandle{ 715 {0, 0}, // 8c1e77823143d35c: all 716 {0, 23}, // 8fb58ff8243b45b0: ASCII only 717 {0, 23}, // 8fb58ff8243b45b0: ASCII only 2 718 {0, 24}, // 2ccc43994f11046f: BMP only 719 {30, 25}, // ce448591bdcb4733: No BMP 720 } 721 722 // lookupValue determines the type of block n and looks up the value for b. 723 func (t *multiTrie) lookupValue(n uint32, b byte) uint64 { 724 switch { 725 default: 726 return uint64(multiValues[n<<6+uint32(b)]) 727 } 728 } 729 730 // multiValues: 32 blocks, 2048 entries, 16384 bytes 731 // The third block is the zero block. 732 var multiValues = [2048]uint64{ 733 // Block 0x0, offset 0x0 734 0x03: 0x6e361699800b9fb8, 0x04: 0x52d3935a34f6f0b, 0x05: 0x2948319393e7ef10, 735 0x07: 0x20f03b006704f663, 0x08: 0x6c15c0732bb2495f, 0x09: 0xe54e2c59d953551, 736 0x0f: 0x33d8a825807d8037, 0x10: 0x6ecd93cb12168b92, 0x11: 0x6a81c9c0ce86e884, 737 0x1f: 0xa03e77aac8be79b, 0x20: 0x28591d0e7e486efa, 0x21: 0x716fa3bc398dec8, 738 0x3f: 0x4fd3bcfa72bce8b0, 739 // Block 0x1, offset 0x40 740 0x40: 0x3cbaef3db8ba5f12, 0x41: 0x2d262347c1f56357, 741 0x7f: 0x782caa2d25a418a9, 742 // Block 0x2, offset 0x80 743 // Block 0x3, offset 0xc0 744 0xc0: 0x6bbd1f937b1ff5d2, 0xc1: 0x732e23088d2eb8a4, 745 // Block 0x4, offset 0x100 746 0x13f: 0x56f8c4c82f5962dc, 747 // Block 0x5, offset 0x140 748 0x140: 0x57dc4544729a5da2, 0x141: 0x2f62f9cd307ffa0d, 749 // Block 0x6, offset 0x180 750 0x1bf: 0x7bf4d0ebf302a088, 751 // Block 0x7, offset 0x1c0 752 0x1c0: 0x1f0d67f249e59931, 0x1c1: 0x3011def73aa550c7, 753 // Block 0x8, offset 0x200 754 0x23f: 0x5de81c1dff6bf29d, 755 // Block 0x9, offset 0x240 756 0x240: 0x752c035737b825e8, 0x241: 0x1e793399081e3bb3, 757 // Block 0xa, offset 0x280 758 0x2bf: 0x6a28f01979cbf059, 759 // Block 0xb, offset 0x2c0 760 0x2c0: 0x373a4b0f2cbd4c74, 0x2c1: 0x4fd2c288683b767c, 761 // Block 0xc, offset 0x300 762 0x33f: 0x5a10ffa9e29184fb, 763 // Block 0xd, offset 0x340 764 0x340: 0x700f9bdb53fff6a5, 0x341: 0xcde93df0427eb79, 765 // Block 0xe, offset 0x380 766 0x3bf: 0x74071288fff39c76, 767 // Block 0xf, offset 0x3c0 768 0x3c0: 0x481fc2f510e5268a, 0x3c1: 0x7565c28164204849, 769 // Block 0x10, offset 0x400 770 0x43f: 0x5676a62fd49c6bec, 771 // Block 0x11, offset 0x440 772 0x440: 0x2f2d15776cbafc6b, 0x441: 0x4c55e8dc0ff11a3f, 773 // Block 0x12, offset 0x480 774 0x4bf: 0x69d6f0fe711fafc9, 775 // Block 0x13, offset 0x4c0 776 0x4c0: 0x33181de28cfb062d, 0x4c1: 0x2ef3adc6bb2f2d02, 777 // Block 0x14, offset 0x500 778 0x53f: 0xe03b31814c95f8b, 779 // Block 0x15, offset 0x540 780 0x540: 0x3bf6dc9a1c115603, 0x541: 0x6984ec9b7f51f7fc, 781 // Block 0x16, offset 0x580 782 0x5bf: 0x3c02ea92fb168559, 783 // Block 0x17, offset 0x5c0 784 0x5c0: 0x1badfe42e7629494, 0x5c1: 0x6dc4a554005f7645, 785 // Block 0x18, offset 0x600 786 0x63f: 0x3bb2ed2a72748f4b, 787 // Block 0x19, offset 0x640 788 0x640: 0x291354cd6767ec10, 0x641: 0x2c3a4715e3c070d6, 789 // Block 0x1a, offset 0x680 790 0x6bf: 0x352711cfb7236418, 791 // Block 0x1b, offset 0x6c0 792 0x6c0: 0x3a59d34fb8bceda, 0x6c1: 0x5e90d8ebedd64fa1, 793 // Block 0x1c, offset 0x700 794 0x73f: 0x7191a77b28d23110, 795 // Block 0x1d, offset 0x740 796 0x740: 0x4ca7f0c1623423d8, 0x741: 0x4f7156d996e2d0de, 797 // Block 0x1e, offset 0x780 798 // Block 0x1f, offset 0x7c0 799 } 800 801 // multiIndex: 29 blocks, 1856 entries, 1856 bytes 802 // Block 0 is the zero block. 803 var multiIndex = [1856]uint8{ 804 // Block 0x0, offset 0x0 805 // Block 0x1, offset 0x40 806 // Block 0x2, offset 0x80 807 // Block 0x3, offset 0xc0 808 0xc2: 0x01, 0xc3: 0x02, 0xc4: 0x03, 0xc7: 0x04, 809 0xc8: 0x05, 0xcf: 0x06, 810 0xd0: 0x07, 811 0xdf: 0x08, 812 0xe0: 0x02, 0xe1: 0x03, 0xe2: 0x04, 0xe3: 0x05, 0xe4: 0x06, 0xe7: 0x07, 813 0xe8: 0x08, 0xef: 0x09, 814 0xf0: 0x0e, 0xf1: 0x11, 0xf2: 0x13, 0xf3: 0x15, 0xf4: 0x17, 815 // Block 0x4, offset 0x100 816 0x120: 0x09, 817 0x13f: 0x0a, 818 // Block 0x5, offset 0x140 819 0x140: 0x0b, 820 0x17f: 0x0c, 821 // Block 0x6, offset 0x180 822 0x180: 0x0d, 823 // Block 0x7, offset 0x1c0 824 0x1ff: 0x0e, 825 // Block 0x8, offset 0x200 826 0x200: 0x0f, 827 // Block 0x9, offset 0x240 828 0x27f: 0x10, 829 // Block 0xa, offset 0x280 830 0x280: 0x11, 831 // Block 0xb, offset 0x2c0 832 0x2ff: 0x12, 833 // Block 0xc, offset 0x300 834 0x300: 0x13, 835 // Block 0xd, offset 0x340 836 0x37f: 0x14, 837 // Block 0xe, offset 0x380 838 0x380: 0x15, 839 // Block 0xf, offset 0x3c0 840 0x3ff: 0x16, 841 // Block 0x10, offset 0x400 842 0x410: 0x0a, 843 0x41f: 0x0b, 844 0x420: 0x0c, 845 0x43f: 0x0d, 846 // Block 0x11, offset 0x440 847 0x440: 0x17, 848 // Block 0x12, offset 0x480 849 0x4bf: 0x18, 850 // Block 0x13, offset 0x4c0 851 0x4c0: 0x0f, 852 0x4ff: 0x10, 853 // Block 0x14, offset 0x500 854 0x500: 0x19, 855 // Block 0x15, offset 0x540 856 0x540: 0x12, 857 // Block 0x16, offset 0x580 858 0x5bf: 0x1a, 859 // Block 0x17, offset 0x5c0 860 0x5ff: 0x14, 861 // Block 0x18, offset 0x600 862 0x600: 0x1b, 863 // Block 0x19, offset 0x640 864 0x640: 0x16, 865 // Block 0x1a, offset 0x680 866 // Block 0x1b, offset 0x6c0 867 0x6c2: 0x01, 0x6c3: 0x02, 0x6c4: 0x03, 0x6c7: 0x04, 868 0x6c8: 0x05, 0x6cf: 0x06, 869 0x6d0: 0x07, 870 0x6df: 0x08, 871 0x6e0: 0x02, 0x6e1: 0x03, 0x6e2: 0x04, 0x6e3: 0x05, 0x6e4: 0x06, 0x6e7: 0x07, 872 0x6e8: 0x08, 0x6ef: 0x09, 873 // Block 0x1c, offset 0x700 874 0x730: 0x0e, 0x731: 0x11, 0x732: 0x13, 0x733: 0x15, 0x734: 0x17, 875 }