github.com/night-codes/go-json@v0.9.15/internal/decoder/unmarshal_text.go (about) 1 package decoder 2 3 import ( 4 "bytes" 5 "encoding" 6 "unicode" 7 "unicode/utf16" 8 "unicode/utf8" 9 "unsafe" 10 11 "github.com/night-codes/go-json/internal/errors" 12 "github.com/night-codes/go-json/internal/runtime" 13 ) 14 15 type unmarshalTextDecoder struct { 16 typ *runtime.Type 17 structName string 18 fieldName string 19 } 20 21 func newUnmarshalTextDecoder(typ *runtime.Type, structName, fieldName string) *unmarshalTextDecoder { 22 return &unmarshalTextDecoder{ 23 typ: typ, 24 structName: structName, 25 fieldName: fieldName, 26 } 27 } 28 29 func (d *unmarshalTextDecoder) annotateError(cursor int64, err error) { 30 switch e := err.(type) { 31 case *errors.UnmarshalTypeError: 32 e.Struct = d.structName 33 e.Field = d.fieldName 34 case *errors.SyntaxError: 35 e.Offset = cursor 36 } 37 } 38 39 var ( 40 nullbytes = []byte(`null`) 41 ) 42 43 func (d *unmarshalTextDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error { 44 s.skipWhiteSpace() 45 start := s.cursor 46 if err := s.skipValue(depth); err != nil { 47 return err 48 } 49 src := s.buf[start:s.cursor] 50 if len(src) > 0 { 51 switch src[0] { 52 case '[': 53 return &errors.UnmarshalTypeError{ 54 Value: "array", 55 Type: runtime.RType2Type(d.typ), 56 Offset: s.totalOffset(), 57 } 58 case '{': 59 return &errors.UnmarshalTypeError{ 60 Value: "object", 61 Type: runtime.RType2Type(d.typ), 62 Offset: s.totalOffset(), 63 } 64 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 65 return &errors.UnmarshalTypeError{ 66 Value: "number", 67 Type: runtime.RType2Type(d.typ), 68 Offset: s.totalOffset(), 69 } 70 case 'n': 71 if bytes.Equal(src, nullbytes) { 72 *(*unsafe.Pointer)(p) = nil 73 return nil 74 } 75 } 76 } 77 dst := make([]byte, len(src)) 78 copy(dst, src) 79 80 if b, ok := unquoteBytes(dst); ok { 81 dst = b 82 } 83 v := *(*interface{})(unsafe.Pointer(&emptyInterface{ 84 typ: d.typ, 85 ptr: p, 86 })) 87 if err := v.(encoding.TextUnmarshaler).UnmarshalText(dst); err != nil { 88 d.annotateError(s.cursor, err) 89 return err 90 } 91 return nil 92 } 93 94 func (d *unmarshalTextDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) { 95 buf := ctx.Buf 96 cursor = skipWhiteSpace(buf, cursor) 97 start := cursor 98 end, err := skipValue(buf, cursor, depth) 99 if err != nil { 100 return 0, err 101 } 102 src := buf[start:end] 103 if len(src) > 0 { 104 switch src[0] { 105 case '[': 106 return 0, &errors.UnmarshalTypeError{ 107 Value: "array", 108 Type: runtime.RType2Type(d.typ), 109 Offset: start, 110 } 111 case '{': 112 return 0, &errors.UnmarshalTypeError{ 113 Value: "object", 114 Type: runtime.RType2Type(d.typ), 115 Offset: start, 116 } 117 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 118 return 0, &errors.UnmarshalTypeError{ 119 Value: "number", 120 Type: runtime.RType2Type(d.typ), 121 Offset: start, 122 } 123 case 'n': 124 if bytes.Equal(src, nullbytes) { 125 *(*unsafe.Pointer)(p) = nil 126 return end, nil 127 } 128 } 129 } 130 131 if s, ok := unquoteBytes(src); ok { 132 src = s 133 } 134 v := *(*interface{})(unsafe.Pointer(&emptyInterface{ 135 typ: d.typ, 136 ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)), 137 })) 138 if err := v.(encoding.TextUnmarshaler).UnmarshalText(src); err != nil { 139 d.annotateError(cursor, err) 140 return 0, err 141 } 142 return end, nil 143 } 144 145 func unquoteBytes(s []byte) (t []byte, ok bool) { 146 length := len(s) 147 if length < 2 || s[0] != '"' || s[length-1] != '"' { 148 return 149 } 150 s = s[1 : length-1] 151 length -= 2 152 153 // Check for unusual characters. If there are none, 154 // then no unquoting is needed, so return a slice of the 155 // original bytes. 156 r := 0 157 for r < length { 158 c := s[r] 159 if c == '\\' || c == '"' || c < ' ' { 160 break 161 } 162 if c < utf8.RuneSelf { 163 r++ 164 continue 165 } 166 rr, size := utf8.DecodeRune(s[r:]) 167 if rr == utf8.RuneError && size == 1 { 168 break 169 } 170 r += size 171 } 172 if r == length { 173 return s, true 174 } 175 176 b := make([]byte, length+2*utf8.UTFMax) 177 w := copy(b, s[0:r]) 178 for r < length { 179 // Out of room? Can only happen if s is full of 180 // malformed UTF-8 and we're replacing each 181 // byte with RuneError. 182 if w >= len(b)-2*utf8.UTFMax { 183 nb := make([]byte, (len(b)+utf8.UTFMax)*2) 184 copy(nb, b[0:w]) 185 b = nb 186 } 187 switch c := s[r]; { 188 case c == '\\': 189 r++ 190 if r >= length { 191 return 192 } 193 switch s[r] { 194 default: 195 return 196 case '"', '\\', '/', '\'': 197 b[w] = s[r] 198 r++ 199 w++ 200 case 'b': 201 b[w] = '\b' 202 r++ 203 w++ 204 case 'f': 205 b[w] = '\f' 206 r++ 207 w++ 208 case 'n': 209 b[w] = '\n' 210 r++ 211 w++ 212 case 'r': 213 b[w] = '\r' 214 r++ 215 w++ 216 case 't': 217 b[w] = '\t' 218 r++ 219 w++ 220 case 'u': 221 r-- 222 rr := getu4(s[r:]) 223 if rr < 0 { 224 return 225 } 226 r += 6 227 if utf16.IsSurrogate(rr) { 228 rr1 := getu4(s[r:]) 229 if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { 230 // A valid pair; consume. 231 r += 6 232 w += utf8.EncodeRune(b[w:], dec) 233 break 234 } 235 // Invalid surrogate; fall back to replacement rune. 236 rr = unicode.ReplacementChar 237 } 238 w += utf8.EncodeRune(b[w:], rr) 239 } 240 241 // Quote, control characters are invalid. 242 case c == '"', c < ' ': 243 return 244 245 // ASCII 246 case c < utf8.RuneSelf: 247 b[w] = c 248 r++ 249 w++ 250 251 // Coerce to well-formed UTF-8. 252 default: 253 rr, size := utf8.DecodeRune(s[r:]) 254 r += size 255 w += utf8.EncodeRune(b[w:], rr) 256 } 257 } 258 return b[0:w], true 259 } 260 261 func getu4(s []byte) rune { 262 if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { 263 return -1 264 } 265 var r rune 266 for _, c := range s[2:6] { 267 switch { 268 case '0' <= c && c <= '9': 269 c = c - '0' 270 case 'a' <= c && c <= 'f': 271 c = c - 'a' + 10 272 case 'A' <= c && c <= 'F': 273 c = c - 'A' + 10 274 default: 275 return -1 276 } 277 r = r*16 + rune(c) 278 } 279 return r 280 }