github.com/zerosnake0/jzon@v0.0.9-0.20230801092939-1b135cb83f7f/iterator_str.go (about) 1 package jzon 2 3 import ( 4 "unicode/utf16" 5 ) 6 7 const ( 8 noEscape = 0 9 invalidHex = -1 10 ) 11 12 var ( 13 escapeMap [charNum]byte 14 hexValue [charNum]int8 15 ) 16 17 func init() { 18 // escaped characters 19 for i := 0; i < charNum; i++ { 20 escapeMap[i] = noEscape 21 } 22 for k, v := range map[byte]byte{ 23 '"': '"', 24 '\\': '\\', 25 '/': '/', 26 'b': '\b', 27 'f': '\f', 28 'n': '\n', 29 'r': '\r', 30 't': '\t', 31 } { 32 escapeMap[k] = v 33 } 34 // hex values 35 for i := 0; i < charNum; i++ { 36 hexValue[i] = invalidHex 37 } 38 for c := '0'; c <= '9'; c++ { 39 hexValue[c] = int8(c - '0') 40 } 41 for c := 'a'; c <= 'f'; c++ { 42 hexValue[c] = int8(c - 'a' + 10) 43 } 44 for c := 'A'; c <= 'F'; c++ { 45 hexValue[c] = int8(c - 'A' + 10) 46 } 47 } 48 49 func (it *Iterator) readU4() (ret rune, err error) { 50 remain := 4 51 for { 52 i := it.head 53 for ; i < it.tail; i++ { 54 c := it.buffer[i] 55 u4v := hexValue[c] 56 if u4v == invalidHex { 57 return 0, InvalidUnicodeCharError{c: c} 58 } 59 ret = ret<<4 + int32(u4v) 60 if remain == 1 { 61 it.head = i + 1 62 return 63 } 64 remain-- 65 } 66 it.head = i 67 if err = it.readMore(); err != nil { 68 return 69 } 70 } 71 } 72 73 func (it *Iterator) readEscapedChar(b []byte) ([]byte, error) { 74 c, err := it.nextByte() 75 if err != nil { 76 return b, err 77 } 78 escaped := escapeMap[c] 79 if escaped != noEscape { 80 it.head++ 81 return append(b, escaped), nil 82 } 83 if c != 'u' { 84 return b, InvalidEscapeCharError{c: c} 85 } 86 it.head++ 87 r, err := it.readU4() 88 if err != nil { 89 return b, err 90 } 91 Retry: 92 if utf16.IsSurrogate(r) { 93 c, err := it.nextByte() 94 if err != nil { 95 return b, err 96 } 97 if c != '\\' { 98 return appendRune(b, r), nil 99 } 100 it.head++ 101 c, err = it.nextByte() 102 if err != nil { 103 return b, err 104 } 105 if c != 'u' { 106 b = appendRune(b, r) 107 escaped := escapeMap[c] 108 if escaped == noEscape { 109 return b, InvalidEscapeCharError{c: c} 110 } 111 it.head++ 112 return append(b, escaped), nil 113 } 114 it.head++ 115 r2, err := it.readU4() 116 if err != nil { 117 return b, err 118 } 119 combined := utf16.DecodeRune(r, r2) 120 if combined == runeError { 121 b = appendRune(b, r) 122 r = r2 123 goto Retry 124 } 125 return appendRune(b, combined), nil 126 } 127 return appendRune(b, r), nil 128 } 129 130 // internal, call only after a '"' is consumed 131 // the result is a part of the temp buffer, should be copied if 132 // the data needs to be saved 133 func (it *Iterator) readStringAsSlice() (_ []byte, err error) { 134 for i := it.head; i < it.tail; i++ { 135 c := it.buffer[i] 136 if c < ' ' { // json.org 137 return nil, InvalidStringCharError{c: c} 138 } 139 if c == '"' { 140 it.tmpBuffer = append(it.tmpBuffer[:0], it.buffer[it.head:i]...) 141 it.head = i + 1 142 return it.tmpBuffer, nil 143 } else if c == '\\' { 144 buf := append(it.tmpBuffer[:0], it.buffer[it.head:i]...) 145 it.head = i + 1 146 buf, err = it.readEscapedChar(buf) 147 if err != nil { 148 it.tmpBuffer = buf 149 return nil, err 150 } 151 i = it.head 152 buf, err = it.readStringAsSliceSlow(buf) 153 it.tmpBuffer = buf 154 return buf, err 155 } 156 } 157 buf := append(it.tmpBuffer[:0], it.buffer[it.head:it.tail]...) 158 it.head = it.tail 159 if err := it.readMore(); err != nil { 160 it.tmpBuffer = buf 161 return nil, err 162 } 163 buf, err = it.readStringAsSliceSlow(buf) 164 it.tmpBuffer = buf 165 return buf, err 166 } 167 168 func (it *Iterator) readStringAsSliceSlow(buf []byte) (_ []byte, err error) { 169 for { 170 i := it.head 171 for i < it.tail { 172 c := it.buffer[i] 173 if c < ' ' { // json.org 174 return buf, InvalidStringCharError{c: c} 175 } 176 if c == '"' { 177 buf = append(buf, it.buffer[it.head:i]...) 178 it.head = i + 1 179 return buf, nil 180 } else if c == '\\' { 181 buf = append(buf, it.buffer[it.head:i]...) 182 it.head = i + 1 183 buf, err = it.readEscapedChar(buf) 184 if err != nil { 185 return buf, err 186 } 187 i = it.head 188 } else { 189 i++ 190 } 191 } 192 // i == it.tail 193 buf = append(buf, it.buffer[it.head:i]...) 194 it.head = i 195 if err = it.readMore(); err != nil { 196 return buf, err 197 } 198 } 199 } 200 201 func (it *Iterator) expectQuote() error { 202 c, err := it.nextToken() 203 if err != nil { 204 return err 205 } 206 if c != '"' { 207 return UnexpectedByteError{exp: '"', got: c} 208 } 209 it.head++ // consume the leading '"' 210 return nil 211 } 212 213 // ReadStringAsSlice reads a string as a byte slice 214 // The returned slice can only be used temporarily, a copy must be made 215 // if the result needs to be saved 216 func (it *Iterator) ReadStringAsSlice() (_ []byte, err error) { 217 if err = it.expectQuote(); err != nil { 218 return 219 } 220 return it.readStringAsSlice() 221 } 222 223 // ReadStringAndAppend reads a string and appends to a byte slice 224 func (it *Iterator) ReadStringAndAppend(buf []byte) (_ []byte, err error) { 225 if err = it.expectQuote(); err != nil { 226 return 227 } 228 s, err := it.readStringAsSlice() 229 if err != nil { 230 return 231 } 232 return append(buf, s...), nil 233 } 234 235 // internal, call only after a '"' is consumed 236 func (it *Iterator) readString() (ret string, err error) { 237 buf, err := it.readStringAsSlice() 238 if err == nil { 239 ret = string(buf) 240 } 241 return 242 } 243 244 // ReadString reads a string 245 func (it *Iterator) ReadString() (_ string, err error) { 246 if err = it.expectQuote(); err != nil { 247 return 248 } 249 return it.readString() 250 } 251 252 // From unicode/utf8 (which is also used by jsoniter) 253 const ( 254 t1 = 0x00 // 0000 0000 255 tx = 0x80 // 1000 0000 256 t2 = 0xC0 // 1100 0000 257 t3 = 0xE0 // 1110 0000 258 t4 = 0xF0 // 1111 0000 259 t5 = 0xF8 // 1111 1000 260 261 maskx = 0x3F // 0011 1111 262 mask2 = 0x1F // 0001 1111 263 mask3 = 0x0F // 0000 1111 264 mask4 = 0x07 // 0000 0111 265 266 rune1Max = 1<<7 - 1 267 rune2Max = 1<<11 - 1 268 rune3Max = 1<<16 - 1 269 270 surrogateMin = 0xD800 271 surrogateMax = 0xDFFF 272 273 maxRune = '\U0010FFFF' // Maximum valid Unicode code point. 274 runeError = '\uFFFD' // the "error" Rune or "Unicode replacement character" 275 ) 276 277 func appendRune(p []byte, r rune) []byte { 278 // Negative values are erroneous. Making it unsigned addresses the problem. 279 switch i := uint32(r); { 280 case i <= rune1Max: 281 return append(p, byte(r)) 282 case i <= rune2Max: 283 return append(p, t2|byte(r>>6), tx|byte(r)&maskx) 284 case i > maxRune, surrogateMin <= i && i <= surrogateMax: 285 r = runeError 286 fallthrough 287 case i <= rune3Max: 288 return append(p, t3|byte(r>>12), tx|byte(r>>6)&maskx, 289 tx|byte(r)&maskx) 290 default: 291 return append(p, t4|byte(r>>18), tx|byte(r>>12)&maskx, 292 tx|byte(r>>6)&maskx, tx|byte(r)&maskx) 293 } 294 }