github.com/v2pro/plz@v0.0.0-20221028024117-e5f9aec5b631/parse/utf8.go (about) 1 package parse 2 3 const ( 4 // These names of these constants are chosen to give nice alignment in the 5 // table below. The first nibble is an index into acceptRanges or F for 6 // special one-byte cases. The second nibble is the Rune length or the 7 // Status for the special one-byte case. 8 xx = 0xF1 // invalid: size 1 9 as = 0xF0 // ASCII: size 1 10 s1 = 0x02 // accept 0, size 2 11 s2 = 0x13 // accept 1, size 3 12 s3 = 0x03 // accept 0, size 3 13 s4 = 0x23 // accept 2, size 3 14 s5 = 0x34 // accept 3, size 4 15 s6 = 0x04 // accept 0, size 4 16 s7 = 0x44 // accept 4, size 4 17 ) 18 19 // first is information about the first byte in a UTF-8 sequence. 20 var first = [256]uint8{ 21 // 1 2 3 4 5 6 7 8 9 A B C D E F 22 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x00-0x0F 23 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x10-0x1F 24 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x20-0x2F 25 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x30-0x3F 26 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x40-0x4F 27 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x50-0x5F 28 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x60-0x6F 29 as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x70-0x7F 30 // 1 2 3 4 5 6 7 8 9 A B C D E F 31 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x80-0x8F 32 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x90-0x9F 33 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xA0-0xAF 34 xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xB0-0xBF 35 xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xC0-0xCF 36 s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xD0-0xDF 37 s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3, // 0xE0-0xEF 38 s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xF0-0xFF 39 }