github.com/go-enjin/golang-org-x-text@v0.12.1-enjin.2/internal/triegen/data_test.go (about) 1 // This file is generated with "go test -tags generate". DO NOT EDIT! 2 //go:build !generate 3 // +build !generate 4 5 package triegen_test 6 7 // lookup returns the trie value for the first UTF-8 encoding in s and 8 // the width in bytes of this encoding. The size will be 0 if s does not 9 // hold enough bytes to complete the encoding. len(s) must be greater than 0. 10 func (t *randTrie) lookup(s []byte) (v uint8, sz int) { 11 c0 := s[0] 12 switch { 13 case c0 < 0x80: // is ASCII 14 return randValues[c0], 1 15 case c0 < 0xC2: 16 return 0, 1 // Illegal UTF-8: not a starter, not ASCII. 17 case c0 < 0xE0: // 2-byte UTF-8 18 if len(s) < 2 { 19 return 0, 0 20 } 21 i := randIndex[c0] 22 c1 := s[1] 23 if c1 < 0x80 || 0xC0 <= c1 { 24 return 0, 1 // Illegal UTF-8: not a continuation byte. 25 } 26 return t.lookupValue(uint32(i), c1), 2 27 case c0 < 0xF0: // 3-byte UTF-8 28 if len(s) < 3 { 29 return 0, 0 30 } 31 i := randIndex[c0] 32 c1 := s[1] 33 if c1 < 0x80 || 0xC0 <= c1 { 34 return 0, 1 // Illegal UTF-8: not a continuation byte. 35 } 36 o := uint32(i)<<6 + uint32(c1) 37 i = randIndex[o] 38 c2 := s[2] 39 if c2 < 0x80 || 0xC0 <= c2 { 40 return 0, 2 // Illegal UTF-8: not a continuation byte. 41 } 42 return t.lookupValue(uint32(i), c2), 3 43 case c0 < 0xF8: // 4-byte UTF-8 44 if len(s) < 4 { 45 return 0, 0 46 } 47 i := randIndex[c0] 48 c1 := s[1] 49 if c1 < 0x80 || 0xC0 <= c1 { 50 return 0, 1 // Illegal UTF-8: not a continuation byte. 51 } 52 o := uint32(i)<<6 + uint32(c1) 53 i = randIndex[o] 54 c2 := s[2] 55 if c2 < 0x80 || 0xC0 <= c2 { 56 return 0, 2 // Illegal UTF-8: not a continuation byte. 57 } 58 o = uint32(i)<<6 + uint32(c2) 59 i = randIndex[o] 60 c3 := s[3] 61 if c3 < 0x80 || 0xC0 <= c3 { 62 return 0, 3 // Illegal UTF-8: not a continuation byte. 63 } 64 return t.lookupValue(uint32(i), c3), 4 65 } 66 // Illegal rune 67 return 0, 1 68 } 69 70 // lookupUnsafe returns the trie value for the first UTF-8 encoding in s. 71 // s must start with a full and valid UTF-8 encoded rune. 72 func (t *randTrie) lookupUnsafe(s []byte) uint8 { 73 c0 := s[0] 74 if c0 < 0x80 { // is ASCII 75 return randValues[c0] 76 } 77 i := randIndex[c0] 78 if c0 < 0xE0 { // 2-byte UTF-8 79 return t.lookupValue(uint32(i), s[1]) 80 } 81 i = randIndex[uint32(i)<<6+uint32(s[1])] 82 if c0 < 0xF0 { // 3-byte UTF-8 83 return t.lookupValue(uint32(i), s[2]) 84 } 85 i = randIndex[uint32(i)<<6+uint32(s[2])] 86 if c0 < 0xF8 { // 4-byte UTF-8 87 return t.lookupValue(uint32(i), s[3]) 88 } 89 return 0 90 } 91 92 // lookupString returns the trie value for the first UTF-8 encoding in s and 93 // the width in bytes of this encoding. The size will be 0 if s does not 94 // hold enough bytes to complete the encoding. len(s) must be greater than 0. 95 func (t *randTrie) lookupString(s string) (v uint8, sz int) { 96 c0 := s[0] 97 switch { 98 case c0 < 0x80: // is ASCII 99 return randValues[c0], 1 100 case c0 < 0xC2: 101 return 0, 1 // Illegal UTF-8: not a starter, not ASCII. 102 case c0 < 0xE0: // 2-byte UTF-8 103 if len(s) < 2 { 104 return 0, 0 105 } 106 i := randIndex[c0] 107 c1 := s[1] 108 if c1 < 0x80 || 0xC0 <= c1 { 109 return 0, 1 // Illegal UTF-8: not a continuation byte. 110 } 111 return t.lookupValue(uint32(i), c1), 2 112 case c0 < 0xF0: // 3-byte UTF-8 113 if len(s) < 3 { 114 return 0, 0 115 } 116 i := randIndex[c0] 117 c1 := s[1] 118 if c1 < 0x80 || 0xC0 <= c1 { 119 return 0, 1 // Illegal UTF-8: not a continuation byte. 120 } 121 o := uint32(i)<<6 + uint32(c1) 122 i = randIndex[o] 123 c2 := s[2] 124 if c2 < 0x80 || 0xC0 <= c2 { 125 return 0, 2 // Illegal UTF-8: not a continuation byte. 126 } 127 return t.lookupValue(uint32(i), c2), 3 128 case c0 < 0xF8: // 4-byte UTF-8 129 if len(s) < 4 { 130 return 0, 0 131 } 132 i := randIndex[c0] 133 c1 := s[1] 134 if c1 < 0x80 || 0xC0 <= c1 { 135 return 0, 1 // Illegal UTF-8: not a continuation byte. 136 } 137 o := uint32(i)<<6 + uint32(c1) 138 i = randIndex[o] 139 c2 := s[2] 140 if c2 < 0x80 || 0xC0 <= c2 { 141 return 0, 2 // Illegal UTF-8: not a continuation byte. 142 } 143 o = uint32(i)<<6 + uint32(c2) 144 i = randIndex[o] 145 c3 := s[3] 146 if c3 < 0x80 || 0xC0 <= c3 { 147 return 0, 3 // Illegal UTF-8: not a continuation byte. 148 } 149 return t.lookupValue(uint32(i), c3), 4 150 } 151 // Illegal rune 152 return 0, 1 153 } 154 155 // lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. 156 // s must start with a full and valid UTF-8 encoded rune. 157 func (t *randTrie) lookupStringUnsafe(s string) uint8 { 158 c0 := s[0] 159 if c0 < 0x80 { // is ASCII 160 return randValues[c0] 161 } 162 i := randIndex[c0] 163 if c0 < 0xE0 { // 2-byte UTF-8 164 return t.lookupValue(uint32(i), s[1]) 165 } 166 i = randIndex[uint32(i)<<6+uint32(s[1])] 167 if c0 < 0xF0 { // 3-byte UTF-8 168 return t.lookupValue(uint32(i), s[2]) 169 } 170 i = randIndex[uint32(i)<<6+uint32(s[2])] 171 if c0 < 0xF8 { // 4-byte UTF-8 172 return t.lookupValue(uint32(i), s[3]) 173 } 174 return 0 175 } 176 177 // randTrie. Total size: 9280 bytes (9.06 KiB). Checksum: 6debd324a8debb8f. 178 type randTrie struct{} 179 180 func newRandTrie(i int) *randTrie { 181 return &randTrie{} 182 } 183 184 // lookupValue determines the type of block n and looks up the value for b. 185 func (t *randTrie) lookupValue(n uint32, b byte) uint8 { 186 switch { 187 default: 188 return uint8(randValues[n<<6+uint32(b)]) 189 } 190 } 191 192 // randValues: 56 blocks, 3584 entries, 3584 bytes 193 // The third block is the zero block. 194 var randValues = [3584]uint8{ 195 // Block 0x0, offset 0x0 196 // Block 0x1, offset 0x40 197 // Block 0x2, offset 0x80 198 // Block 0x3, offset 0xc0 199 0xc9: 0x0001, 200 // Block 0x4, offset 0x100 201 0x100: 0x0001, 202 // Block 0x5, offset 0x140 203 0x155: 0x0001, 204 // Block 0x6, offset 0x180 205 0x196: 0x0001, 206 // Block 0x7, offset 0x1c0 207 0x1ef: 0x0001, 208 // Block 0x8, offset 0x200 209 0x206: 0x0001, 210 // Block 0x9, offset 0x240 211 0x258: 0x0001, 212 // Block 0xa, offset 0x280 213 0x288: 0x0001, 214 // Block 0xb, offset 0x2c0 215 0x2f2: 0x0001, 216 // Block 0xc, offset 0x300 217 0x304: 0x0001, 218 // Block 0xd, offset 0x340 219 0x34b: 0x0001, 220 // Block 0xe, offset 0x380 221 0x3ba: 0x0001, 222 // Block 0xf, offset 0x3c0 223 0x3f5: 0x0001, 224 // Block 0x10, offset 0x400 225 0x41d: 0x0001, 226 // Block 0x11, offset 0x440 227 0x442: 0x0001, 228 // Block 0x12, offset 0x480 229 0x4bb: 0x0001, 230 // Block 0x13, offset 0x4c0 231 0x4e9: 0x0001, 232 // Block 0x14, offset 0x500 233 0x53e: 0x0001, 234 // Block 0x15, offset 0x540 235 0x55f: 0x0001, 236 // Block 0x16, offset 0x580 237 0x5b7: 0x0001, 238 // Block 0x17, offset 0x5c0 239 0x5d9: 0x0001, 240 // Block 0x18, offset 0x600 241 0x60e: 0x0001, 242 // Block 0x19, offset 0x640 243 0x652: 0x0001, 244 // Block 0x1a, offset 0x680 245 0x68f: 0x0001, 246 // Block 0x1b, offset 0x6c0 247 0x6dc: 0x0001, 248 // Block 0x1c, offset 0x700 249 0x703: 0x0001, 250 // Block 0x1d, offset 0x740 251 0x741: 0x0001, 252 // Block 0x1e, offset 0x780 253 0x79b: 0x0001, 254 // Block 0x1f, offset 0x7c0 255 0x7f1: 0x0001, 256 // Block 0x20, offset 0x800 257 0x833: 0x0001, 258 // Block 0x21, offset 0x840 259 0x853: 0x0001, 260 // Block 0x22, offset 0x880 261 0x8a2: 0x0001, 262 // Block 0x23, offset 0x8c0 263 0x8f8: 0x0001, 264 // Block 0x24, offset 0x900 265 0x917: 0x0001, 266 // Block 0x25, offset 0x940 267 0x945: 0x0001, 268 // Block 0x26, offset 0x980 269 0x99e: 0x0001, 270 // Block 0x27, offset 0x9c0 271 0x9fd: 0x0001, 272 // Block 0x28, offset 0xa00 273 0xa0d: 0x0001, 274 // Block 0x29, offset 0xa40 275 0xa66: 0x0001, 276 // Block 0x2a, offset 0xa80 277 0xaab: 0x0001, 278 // Block 0x2b, offset 0xac0 279 0xaea: 0x0001, 280 // Block 0x2c, offset 0xb00 281 0xb2d: 0x0001, 282 // Block 0x2d, offset 0xb40 283 0xb54: 0x0001, 284 // Block 0x2e, offset 0xb80 285 0xb90: 0x0001, 286 // Block 0x2f, offset 0xbc0 287 0xbe5: 0x0001, 288 // Block 0x30, offset 0xc00 289 0xc28: 0x0001, 290 // Block 0x31, offset 0xc40 291 0xc7c: 0x0001, 292 // Block 0x32, offset 0xc80 293 0xcbf: 0x0001, 294 // Block 0x33, offset 0xcc0 295 0xcc7: 0x0001, 296 // Block 0x34, offset 0xd00 297 0xd34: 0x0001, 298 // Block 0x35, offset 0xd40 299 0xd61: 0x0001, 300 // Block 0x36, offset 0xd80 301 0xdb9: 0x0001, 302 // Block 0x37, offset 0xdc0 303 0xdda: 0x0001, 304 } 305 306 // randIndex: 89 blocks, 5696 entries, 5696 bytes 307 // Block 0 is the zero block. 308 var randIndex = [5696]uint8{ 309 // Block 0x0, offset 0x0 310 // Block 0x1, offset 0x40 311 // Block 0x2, offset 0x80 312 // Block 0x3, offset 0xc0 313 0xe1: 0x02, 0xe3: 0x03, 0xe4: 0x04, 314 0xea: 0x05, 0xeb: 0x06, 0xec: 0x07, 315 0xf0: 0x10, 0xf1: 0x24, 0xf2: 0x3d, 0xf3: 0x4f, 0xf4: 0x56, 316 // Block 0x4, offset 0x100 317 0x107: 0x01, 318 // Block 0x5, offset 0x140 319 0x16c: 0x02, 320 // Block 0x6, offset 0x180 321 0x19c: 0x03, 322 0x1ae: 0x04, 323 // Block 0x7, offset 0x1c0 324 0x1d8: 0x05, 325 0x1f7: 0x06, 326 // Block 0x8, offset 0x200 327 0x20c: 0x07, 328 // Block 0x9, offset 0x240 329 0x24a: 0x08, 330 // Block 0xa, offset 0x280 331 0x2b6: 0x09, 332 // Block 0xb, offset 0x2c0 333 0x2d5: 0x0a, 334 // Block 0xc, offset 0x300 335 0x31a: 0x0b, 336 // Block 0xd, offset 0x340 337 0x373: 0x0c, 338 // Block 0xe, offset 0x380 339 0x38b: 0x0d, 340 // Block 0xf, offset 0x3c0 341 0x3f0: 0x0e, 342 // Block 0x10, offset 0x400 343 0x433: 0x0f, 344 // Block 0x11, offset 0x440 345 0x45d: 0x10, 346 // Block 0x12, offset 0x480 347 0x491: 0x08, 0x494: 0x09, 0x497: 0x0a, 348 0x49b: 0x0b, 0x49c: 0x0c, 349 0x4a1: 0x0d, 350 0x4ad: 0x0e, 351 0x4ba: 0x0f, 352 // Block 0x13, offset 0x4c0 353 0x4c1: 0x11, 354 // Block 0x14, offset 0x500 355 0x531: 0x12, 356 // Block 0x15, offset 0x540 357 0x546: 0x13, 358 // Block 0x16, offset 0x580 359 0x5ab: 0x14, 360 // Block 0x17, offset 0x5c0 361 0x5d4: 0x11, 362 0x5fe: 0x11, 363 // Block 0x18, offset 0x600 364 0x618: 0x0a, 365 // Block 0x19, offset 0x640 366 0x65b: 0x15, 367 // Block 0x1a, offset 0x680 368 0x6a0: 0x16, 369 // Block 0x1b, offset 0x6c0 370 0x6d2: 0x17, 371 0x6f6: 0x18, 372 // Block 0x1c, offset 0x700 373 0x711: 0x19, 374 // Block 0x1d, offset 0x740 375 0x768: 0x1a, 376 // Block 0x1e, offset 0x780 377 0x783: 0x1b, 378 // Block 0x1f, offset 0x7c0 379 0x7f9: 0x1c, 380 // Block 0x20, offset 0x800 381 0x831: 0x1d, 382 // Block 0x21, offset 0x840 383 0x85e: 0x1e, 384 // Block 0x22, offset 0x880 385 0x898: 0x1f, 386 // Block 0x23, offset 0x8c0 387 0x8c7: 0x18, 388 0x8d5: 0x14, 389 0x8f7: 0x20, 390 0x8fe: 0x1f, 391 // Block 0x24, offset 0x900 392 0x905: 0x21, 393 // Block 0x25, offset 0x940 394 0x966: 0x03, 395 // Block 0x26, offset 0x980 396 0x981: 0x07, 0x983: 0x11, 397 0x989: 0x12, 0x98a: 0x13, 0x98e: 0x14, 0x98f: 0x15, 398 0x992: 0x16, 0x995: 0x17, 0x996: 0x18, 399 0x998: 0x19, 0x999: 0x1a, 0x99b: 0x1b, 0x99f: 0x1c, 400 0x9a3: 0x1d, 401 0x9ad: 0x1e, 0x9af: 0x1f, 402 0x9b0: 0x20, 0x9b1: 0x21, 403 0x9b8: 0x22, 0x9bd: 0x23, 404 // Block 0x27, offset 0x9c0 405 0x9cd: 0x22, 406 // Block 0x28, offset 0xa00 407 0xa0c: 0x08, 408 // Block 0x29, offset 0xa40 409 0xa6f: 0x1c, 410 // Block 0x2a, offset 0xa80 411 0xa90: 0x1a, 412 0xaaf: 0x23, 413 // Block 0x2b, offset 0xac0 414 0xae3: 0x19, 415 0xae8: 0x24, 416 0xafc: 0x25, 417 // Block 0x2c, offset 0xb00 418 0xb13: 0x26, 419 // Block 0x2d, offset 0xb40 420 0xb67: 0x1c, 421 // Block 0x2e, offset 0xb80 422 0xb8f: 0x0b, 423 // Block 0x2f, offset 0xbc0 424 0xbcb: 0x27, 425 0xbe7: 0x26, 426 // Block 0x30, offset 0xc00 427 0xc34: 0x16, 428 // Block 0x31, offset 0xc40 429 0xc62: 0x03, 430 // Block 0x32, offset 0xc80 431 0xcbb: 0x12, 432 // Block 0x33, offset 0xcc0 433 0xcdf: 0x09, 434 // Block 0x34, offset 0xd00 435 0xd34: 0x0a, 436 // Block 0x35, offset 0xd40 437 0xd41: 0x1e, 438 // Block 0x36, offset 0xd80 439 0xd83: 0x28, 440 // Block 0x37, offset 0xdc0 441 0xdc0: 0x15, 442 // Block 0x38, offset 0xe00 443 0xe1a: 0x15, 444 // Block 0x39, offset 0xe40 445 0xe65: 0x29, 446 // Block 0x3a, offset 0xe80 447 0xe86: 0x1f, 448 // Block 0x3b, offset 0xec0 449 0xeec: 0x18, 450 // Block 0x3c, offset 0xf00 451 0xf28: 0x2a, 452 // Block 0x3d, offset 0xf40 453 0xf53: 0x08, 454 // Block 0x3e, offset 0xf80 455 0xfa2: 0x2b, 456 0xfaa: 0x17, 457 // Block 0x3f, offset 0xfc0 458 0xfc0: 0x25, 0xfc2: 0x26, 459 0xfc9: 0x27, 0xfcd: 0x28, 0xfce: 0x29, 460 0xfd5: 0x2a, 461 0xfd8: 0x2b, 0xfd9: 0x2c, 0xfdf: 0x2d, 462 0xfe1: 0x2e, 0xfe2: 0x2f, 0xfe3: 0x30, 0xfe6: 0x31, 463 0xfe9: 0x32, 0xfec: 0x33, 0xfed: 0x34, 0xfef: 0x35, 464 0xff1: 0x36, 0xff2: 0x37, 0xff3: 0x38, 0xff4: 0x39, 465 0xffa: 0x3a, 0xffc: 0x3b, 0xffe: 0x3c, 466 // Block 0x40, offset 0x1000 467 0x102c: 0x2c, 468 // Block 0x41, offset 0x1040 469 0x1074: 0x2c, 470 // Block 0x42, offset 0x1080 471 0x108c: 0x08, 472 0x10a0: 0x2d, 473 // Block 0x43, offset 0x10c0 474 0x10e8: 0x10, 475 // Block 0x44, offset 0x1100 476 0x110f: 0x13, 477 // Block 0x45, offset 0x1140 478 0x114b: 0x2e, 479 // Block 0x46, offset 0x1180 480 0x118b: 0x23, 481 0x119d: 0x0c, 482 // Block 0x47, offset 0x11c0 483 0x11c3: 0x12, 484 0x11f9: 0x0f, 485 // Block 0x48, offset 0x1200 486 0x121e: 0x1b, 487 // Block 0x49, offset 0x1240 488 0x1270: 0x2f, 489 // Block 0x4a, offset 0x1280 490 0x128a: 0x1b, 491 0x12a7: 0x02, 492 // Block 0x4b, offset 0x12c0 493 0x12fb: 0x14, 494 // Block 0x4c, offset 0x1300 495 0x1333: 0x30, 496 // Block 0x4d, offset 0x1340 497 0x134d: 0x31, 498 // Block 0x4e, offset 0x1380 499 0x138e: 0x15, 500 // Block 0x4f, offset 0x13c0 501 0x13f4: 0x32, 502 // Block 0x50, offset 0x1400 503 0x141b: 0x33, 504 // Block 0x51, offset 0x1440 505 0x1448: 0x3e, 0x1449: 0x3f, 0x144a: 0x40, 0x144f: 0x41, 506 0x1459: 0x42, 0x145c: 0x43, 0x145e: 0x44, 0x145f: 0x45, 507 0x1468: 0x46, 0x1469: 0x47, 0x146c: 0x48, 0x146d: 0x49, 0x146e: 0x4a, 508 0x1472: 0x4b, 0x1473: 0x4c, 509 0x1479: 0x4d, 0x147b: 0x4e, 510 // Block 0x52, offset 0x1480 511 0x1480: 0x34, 512 0x1499: 0x11, 513 0x14b6: 0x2c, 514 // Block 0x53, offset 0x14c0 515 0x14e4: 0x0d, 516 // Block 0x54, offset 0x1500 517 0x1527: 0x08, 518 // Block 0x55, offset 0x1540 519 0x1555: 0x2b, 520 // Block 0x56, offset 0x1580 521 0x15b2: 0x35, 522 // Block 0x57, offset 0x15c0 523 0x15f2: 0x1c, 0x15f4: 0x29, 524 // Block 0x58, offset 0x1600 525 0x1600: 0x50, 0x1603: 0x51, 526 0x1608: 0x52, 0x160a: 0x53, 0x160d: 0x54, 0x160e: 0x55, 527 } 528 529 // lookup returns the trie value for the first UTF-8 encoding in s and 530 // the width in bytes of this encoding. The size will be 0 if s does not 531 // hold enough bytes to complete the encoding. len(s) must be greater than 0. 532 func (t *multiTrie) lookup(s []byte) (v uint64, sz int) { 533 c0 := s[0] 534 switch { 535 case c0 < 0x80: // is ASCII 536 return t.ascii[c0], 1 537 case c0 < 0xC2: 538 return 0, 1 // Illegal UTF-8: not a starter, not ASCII. 539 case c0 < 0xE0: // 2-byte UTF-8 540 if len(s) < 2 { 541 return 0, 0 542 } 543 i := t.utf8Start[c0] 544 c1 := s[1] 545 if c1 < 0x80 || 0xC0 <= c1 { 546 return 0, 1 // Illegal UTF-8: not a continuation byte. 547 } 548 return t.lookupValue(uint32(i), c1), 2 549 case c0 < 0xF0: // 3-byte UTF-8 550 if len(s) < 3 { 551 return 0, 0 552 } 553 i := t.utf8Start[c0] 554 c1 := s[1] 555 if c1 < 0x80 || 0xC0 <= c1 { 556 return 0, 1 // Illegal UTF-8: not a continuation byte. 557 } 558 o := uint32(i)<<6 + uint32(c1) 559 i = multiIndex[o] 560 c2 := s[2] 561 if c2 < 0x80 || 0xC0 <= c2 { 562 return 0, 2 // Illegal UTF-8: not a continuation byte. 563 } 564 return t.lookupValue(uint32(i), c2), 3 565 case c0 < 0xF8: // 4-byte UTF-8 566 if len(s) < 4 { 567 return 0, 0 568 } 569 i := t.utf8Start[c0] 570 c1 := s[1] 571 if c1 < 0x80 || 0xC0 <= c1 { 572 return 0, 1 // Illegal UTF-8: not a continuation byte. 573 } 574 o := uint32(i)<<6 + uint32(c1) 575 i = multiIndex[o] 576 c2 := s[2] 577 if c2 < 0x80 || 0xC0 <= c2 { 578 return 0, 2 // Illegal UTF-8: not a continuation byte. 579 } 580 o = uint32(i)<<6 + uint32(c2) 581 i = multiIndex[o] 582 c3 := s[3] 583 if c3 < 0x80 || 0xC0 <= c3 { 584 return 0, 3 // Illegal UTF-8: not a continuation byte. 585 } 586 return t.lookupValue(uint32(i), c3), 4 587 } 588 // Illegal rune 589 return 0, 1 590 } 591 592 // lookupUnsafe returns the trie value for the first UTF-8 encoding in s. 593 // s must start with a full and valid UTF-8 encoded rune. 594 func (t *multiTrie) lookupUnsafe(s []byte) uint64 { 595 c0 := s[0] 596 if c0 < 0x80 { // is ASCII 597 return t.ascii[c0] 598 } 599 i := t.utf8Start[c0] 600 if c0 < 0xE0 { // 2-byte UTF-8 601 return t.lookupValue(uint32(i), s[1]) 602 } 603 i = multiIndex[uint32(i)<<6+uint32(s[1])] 604 if c0 < 0xF0 { // 3-byte UTF-8 605 return t.lookupValue(uint32(i), s[2]) 606 } 607 i = multiIndex[uint32(i)<<6+uint32(s[2])] 608 if c0 < 0xF8 { // 4-byte UTF-8 609 return t.lookupValue(uint32(i), s[3]) 610 } 611 return 0 612 } 613 614 // lookupString returns the trie value for the first UTF-8 encoding in s and 615 // the width in bytes of this encoding. The size will be 0 if s does not 616 // hold enough bytes to complete the encoding. len(s) must be greater than 0. 617 func (t *multiTrie) lookupString(s string) (v uint64, sz int) { 618 c0 := s[0] 619 switch { 620 case c0 < 0x80: // is ASCII 621 return t.ascii[c0], 1 622 case c0 < 0xC2: 623 return 0, 1 // Illegal UTF-8: not a starter, not ASCII. 624 case c0 < 0xE0: // 2-byte UTF-8 625 if len(s) < 2 { 626 return 0, 0 627 } 628 i := t.utf8Start[c0] 629 c1 := s[1] 630 if c1 < 0x80 || 0xC0 <= c1 { 631 return 0, 1 // Illegal UTF-8: not a continuation byte. 632 } 633 return t.lookupValue(uint32(i), c1), 2 634 case c0 < 0xF0: // 3-byte UTF-8 635 if len(s) < 3 { 636 return 0, 0 637 } 638 i := t.utf8Start[c0] 639 c1 := s[1] 640 if c1 < 0x80 || 0xC0 <= c1 { 641 return 0, 1 // Illegal UTF-8: not a continuation byte. 642 } 643 o := uint32(i)<<6 + uint32(c1) 644 i = multiIndex[o] 645 c2 := s[2] 646 if c2 < 0x80 || 0xC0 <= c2 { 647 return 0, 2 // Illegal UTF-8: not a continuation byte. 648 } 649 return t.lookupValue(uint32(i), c2), 3 650 case c0 < 0xF8: // 4-byte UTF-8 651 if len(s) < 4 { 652 return 0, 0 653 } 654 i := t.utf8Start[c0] 655 c1 := s[1] 656 if c1 < 0x80 || 0xC0 <= c1 { 657 return 0, 1 // Illegal UTF-8: not a continuation byte. 658 } 659 o := uint32(i)<<6 + uint32(c1) 660 i = multiIndex[o] 661 c2 := s[2] 662 if c2 < 0x80 || 0xC0 <= c2 { 663 return 0, 2 // Illegal UTF-8: not a continuation byte. 664 } 665 o = uint32(i)<<6 + uint32(c2) 666 i = multiIndex[o] 667 c3 := s[3] 668 if c3 < 0x80 || 0xC0 <= c3 { 669 return 0, 3 // Illegal UTF-8: not a continuation byte. 670 } 671 return t.lookupValue(uint32(i), c3), 4 672 } 673 // Illegal rune 674 return 0, 1 675 } 676 677 // lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. 678 // s must start with a full and valid UTF-8 encoded rune. 679 func (t *multiTrie) lookupStringUnsafe(s string) uint64 { 680 c0 := s[0] 681 if c0 < 0x80 { // is ASCII 682 return t.ascii[c0] 683 } 684 i := t.utf8Start[c0] 685 if c0 < 0xE0 { // 2-byte UTF-8 686 return t.lookupValue(uint32(i), s[1]) 687 } 688 i = multiIndex[uint32(i)<<6+uint32(s[1])] 689 if c0 < 0xF0 { // 3-byte UTF-8 690 return t.lookupValue(uint32(i), s[2]) 691 } 692 i = multiIndex[uint32(i)<<6+uint32(s[2])] 693 if c0 < 0xF8 { // 4-byte UTF-8 694 return t.lookupValue(uint32(i), s[3]) 695 } 696 return 0 697 } 698 699 // multiTrie. Total size: 18250 bytes (17.82 KiB). Checksum: a69a609d8696aa5e. 700 type multiTrie struct { 701 ascii []uint64 // index for ASCII bytes 702 utf8Start []uint8 // index for UTF-8 bytes >= 0xC0 703 } 704 705 func newMultiTrie(i int) *multiTrie { 706 h := multiTrieHandles[i] 707 return &multiTrie{multiValues[uint32(h.ascii)<<6:], multiIndex[uint32(h.multi)<<6:]} 708 } 709 710 type multiTrieHandle struct { 711 ascii, multi uint8 712 } 713 714 // multiTrieHandles: 5 handles, 10 bytes 715 var multiTrieHandles = [5]multiTrieHandle{ 716 {0, 0}, // 8c1e77823143d35c: all 717 {0, 23}, // 8fb58ff8243b45b0: ASCII only 718 {0, 23}, // 8fb58ff8243b45b0: ASCII only 2 719 {0, 24}, // 2ccc43994f11046f: BMP only 720 {30, 25}, // ce448591bdcb4733: No BMP 721 } 722 723 // lookupValue determines the type of block n and looks up the value for b. 724 func (t *multiTrie) lookupValue(n uint32, b byte) uint64 { 725 switch { 726 default: 727 return uint64(multiValues[n<<6+uint32(b)]) 728 } 729 } 730 731 // multiValues: 32 blocks, 2048 entries, 16384 bytes 732 // The third block is the zero block. 733 var multiValues = [2048]uint64{ 734 // Block 0x0, offset 0x0 735 0x03: 0x6e361699800b9fb8, 0x04: 0x52d3935a34f6f0b, 0x05: 0x2948319393e7ef10, 736 0x07: 0x20f03b006704f663, 0x08: 0x6c15c0732bb2495f, 0x09: 0xe54e2c59d953551, 737 0x0f: 0x33d8a825807d8037, 0x10: 0x6ecd93cb12168b92, 0x11: 0x6a81c9c0ce86e884, 738 0x1f: 0xa03e77aac8be79b, 0x20: 0x28591d0e7e486efa, 0x21: 0x716fa3bc398dec8, 739 0x3f: 0x4fd3bcfa72bce8b0, 740 // Block 0x1, offset 0x40 741 0x40: 0x3cbaef3db8ba5f12, 0x41: 0x2d262347c1f56357, 742 0x7f: 0x782caa2d25a418a9, 743 // Block 0x2, offset 0x80 744 // Block 0x3, offset 0xc0 745 0xc0: 0x6bbd1f937b1ff5d2, 0xc1: 0x732e23088d2eb8a4, 746 // Block 0x4, offset 0x100 747 0x13f: 0x56f8c4c82f5962dc, 748 // Block 0x5, offset 0x140 749 0x140: 0x57dc4544729a5da2, 0x141: 0x2f62f9cd307ffa0d, 750 // Block 0x6, offset 0x180 751 0x1bf: 0x7bf4d0ebf302a088, 752 // Block 0x7, offset 0x1c0 753 0x1c0: 0x1f0d67f249e59931, 0x1c1: 0x3011def73aa550c7, 754 // Block 0x8, offset 0x200 755 0x23f: 0x5de81c1dff6bf29d, 756 // Block 0x9, offset 0x240 757 0x240: 0x752c035737b825e8, 0x241: 0x1e793399081e3bb3, 758 // Block 0xa, offset 0x280 759 0x2bf: 0x6a28f01979cbf059, 760 // Block 0xb, offset 0x2c0 761 0x2c0: 0x373a4b0f2cbd4c74, 0x2c1: 0x4fd2c288683b767c, 762 // Block 0xc, offset 0x300 763 0x33f: 0x5a10ffa9e29184fb, 764 // Block 0xd, offset 0x340 765 0x340: 0x700f9bdb53fff6a5, 0x341: 0xcde93df0427eb79, 766 // Block 0xe, offset 0x380 767 0x3bf: 0x74071288fff39c76, 768 // Block 0xf, offset 0x3c0 769 0x3c0: 0x481fc2f510e5268a, 0x3c1: 0x7565c28164204849, 770 // Block 0x10, offset 0x400 771 0x43f: 0x5676a62fd49c6bec, 772 // Block 0x11, offset 0x440 773 0x440: 0x2f2d15776cbafc6b, 0x441: 0x4c55e8dc0ff11a3f, 774 // Block 0x12, offset 0x480 775 0x4bf: 0x69d6f0fe711fafc9, 776 // Block 0x13, offset 0x4c0 777 0x4c0: 0x33181de28cfb062d, 0x4c1: 0x2ef3adc6bb2f2d02, 778 // Block 0x14, offset 0x500 779 0x53f: 0xe03b31814c95f8b, 780 // Block 0x15, offset 0x540 781 0x540: 0x3bf6dc9a1c115603, 0x541: 0x6984ec9b7f51f7fc, 782 // Block 0x16, offset 0x580 783 0x5bf: 0x3c02ea92fb168559, 784 // Block 0x17, offset 0x5c0 785 0x5c0: 0x1badfe42e7629494, 0x5c1: 0x6dc4a554005f7645, 786 // Block 0x18, offset 0x600 787 0x63f: 0x3bb2ed2a72748f4b, 788 // Block 0x19, offset 0x640 789 0x640: 0x291354cd6767ec10, 0x641: 0x2c3a4715e3c070d6, 790 // Block 0x1a, offset 0x680 791 0x6bf: 0x352711cfb7236418, 792 // Block 0x1b, offset 0x6c0 793 0x6c0: 0x3a59d34fb8bceda, 0x6c1: 0x5e90d8ebedd64fa1, 794 // Block 0x1c, offset 0x700 795 0x73f: 0x7191a77b28d23110, 796 // Block 0x1d, offset 0x740 797 0x740: 0x4ca7f0c1623423d8, 0x741: 0x4f7156d996e2d0de, 798 // Block 0x1e, offset 0x780 799 // Block 0x1f, offset 0x7c0 800 } 801 802 // multiIndex: 29 blocks, 1856 entries, 1856 bytes 803 // Block 0 is the zero block. 804 var multiIndex = [1856]uint8{ 805 // Block 0x0, offset 0x0 806 // Block 0x1, offset 0x40 807 // Block 0x2, offset 0x80 808 // Block 0x3, offset 0xc0 809 0xc2: 0x01, 0xc3: 0x02, 0xc4: 0x03, 0xc7: 0x04, 810 0xc8: 0x05, 0xcf: 0x06, 811 0xd0: 0x07, 812 0xdf: 0x08, 813 0xe0: 0x02, 0xe1: 0x03, 0xe2: 0x04, 0xe3: 0x05, 0xe4: 0x06, 0xe7: 0x07, 814 0xe8: 0x08, 0xef: 0x09, 815 0xf0: 0x0e, 0xf1: 0x11, 0xf2: 0x13, 0xf3: 0x15, 0xf4: 0x17, 816 // Block 0x4, offset 0x100 817 0x120: 0x09, 818 0x13f: 0x0a, 819 // Block 0x5, offset 0x140 820 0x140: 0x0b, 821 0x17f: 0x0c, 822 // Block 0x6, offset 0x180 823 0x180: 0x0d, 824 // Block 0x7, offset 0x1c0 825 0x1ff: 0x0e, 826 // Block 0x8, offset 0x200 827 0x200: 0x0f, 828 // Block 0x9, offset 0x240 829 0x27f: 0x10, 830 // Block 0xa, offset 0x280 831 0x280: 0x11, 832 // Block 0xb, offset 0x2c0 833 0x2ff: 0x12, 834 // Block 0xc, offset 0x300 835 0x300: 0x13, 836 // Block 0xd, offset 0x340 837 0x37f: 0x14, 838 // Block 0xe, offset 0x380 839 0x380: 0x15, 840 // Block 0xf, offset 0x3c0 841 0x3ff: 0x16, 842 // Block 0x10, offset 0x400 843 0x410: 0x0a, 844 0x41f: 0x0b, 845 0x420: 0x0c, 846 0x43f: 0x0d, 847 // Block 0x11, offset 0x440 848 0x440: 0x17, 849 // Block 0x12, offset 0x480 850 0x4bf: 0x18, 851 // Block 0x13, offset 0x4c0 852 0x4c0: 0x0f, 853 0x4ff: 0x10, 854 // Block 0x14, offset 0x500 855 0x500: 0x19, 856 // Block 0x15, offset 0x540 857 0x540: 0x12, 858 // Block 0x16, offset 0x580 859 0x5bf: 0x1a, 860 // Block 0x17, offset 0x5c0 861 0x5ff: 0x14, 862 // Block 0x18, offset 0x600 863 0x600: 0x1b, 864 // Block 0x19, offset 0x640 865 0x640: 0x16, 866 // Block 0x1a, offset 0x680 867 // Block 0x1b, offset 0x6c0 868 0x6c2: 0x01, 0x6c3: 0x02, 0x6c4: 0x03, 0x6c7: 0x04, 869 0x6c8: 0x05, 0x6cf: 0x06, 870 0x6d0: 0x07, 871 0x6df: 0x08, 872 0x6e0: 0x02, 0x6e1: 0x03, 0x6e2: 0x04, 0x6e3: 0x05, 0x6e4: 0x06, 0x6e7: 0x07, 873 0x6e8: 0x08, 0x6ef: 0x09, 874 // Block 0x1c, offset 0x700 875 0x730: 0x0e, 0x731: 0x11, 0x732: 0x13, 0x733: 0x15, 0x734: 0x17, 876 }